I provide here further evolutionary transcriptomics analyses that
informed the work, most of which did not make into the main figures,
e.g. denoised TAI, pTAI analysis and GO analyses. Note: permutations
here are set at 500 rather than 50000 to ensure a fast run. In case you
are wondering what TAI is, as mentioned in
2. Evolutionary transcriptomics analyses, it stands for
“transcriptome age index” (see the R package myTAI for
further details).
noisyRTesting TAI using the dataset that has been denoised by
noisyr. The default settings were used
the selected similarity metric is correlation_pearson,
similarity.threshold = 0.25 &
method.chosen = Boxplot-IQR.
Note: a lot of genes are removed through this analysis.
library(tidyverse)
library(myTAI)
The PES that was constructed using denoised data is already made. I
am only using the raw TPM, sqrt(TPM) transform
and log2(TPM+1) transform because I don’t think the
rank and the rlog transformations are
necessary for this datatype.
The only issue I see here would be oversampling the permutations.
Non-transformed
Fd_PES <-
readr::read_csv(file = "data/Fd_PES_denoised.csv", show_col_types = FALSE)
Fd_PES_F <-
readr::read_csv(file = "data/Fd_PES_F_denoised.csv", show_col_types = FALSE)
Fd_PES_M <-
readr::read_csv(file = "data/Fd_PES_M_denoised.csv", show_col_types = FALSE)
Fs_PES <-
readr::read_csv(file = "data/Fs_PES_denoised.csv", show_col_types = FALSE)
Fs_PES_F <-
readr::read_csv(file = "data/Fs_PES_F_denoised.csv", show_col_types = FALSE)
Fs_PES_M <-
readr::read_csv(file = "data/Fs_PES_M_denoised.csv", show_col_types = FALSE)
sqrt-tranformed
Fd_PES.sqrt <-
readr::read_csv(file = "data/Fd_PES.sqrt_denoised.csv", show_col_types = FALSE)
Fd_PES_F.sqrt <-
readr::read_csv(file = "data/Fd_PES_F.sqrt_denoised.csv", show_col_types = FALSE)
Fd_PES_M.sqrt <-
readr::read_csv(file = "data/Fd_PES_M.sqrt_denoised.csv", show_col_types = FALSE)
Fs_PES.sqrt <-
readr::read_csv(file = "data/Fs_PES.sqrt_denoised.csv", show_col_types = FALSE)
Fs_PES_F.sqrt <-
readr::read_csv(file = "data/Fs_PES_F.sqrt_denoised.csv", show_col_types = FALSE)
Fs_PES_M.sqrt <-
readr::read_csv(file = "data/Fs_PES_M.sqrt_denoised.csv", show_col_types = FALSE)
log2-tranformed
Fd_PES.log2 <-
readr::read_csv(file = "data/Fd_PES.log2_denoised.csv", show_col_types = FALSE)
Fd_PES_F.log2 <-
readr::read_csv(file = "data/Fd_PES_F.log2_denoised.csv", show_col_types = FALSE)
Fd_PES_M.log2 <-
readr::read_csv(file = "data/Fd_PES_M.log2_denoised.csv", show_col_types = FALSE)
Fs_PES.log2 <-
readr::read_csv(file = "data/Fs_PES.log2_denoised.csv", show_col_types = FALSE)
Fs_PES_F.log2 <-
readr::read_csv(file = "data/Fs_PES_F.log2_denoised.csv", show_col_types = FALSE)
Fs_PES_M.log2 <-
readr::read_csv(file = "data/Fs_PES_M.log2_denoised.csv", show_col_types = FALSE)
I had previously used a function to combine males and females into one TAI plot.
#TI stands for transcriptome index
TI.preplot <- function(ExpressionSet, permutations = 500){
std <-
myTAI::bootMatrix(
ExpressionSet = ExpressionSet,
permutations = permutations) %>%
apply(2, stats::sd)
TI.out <-
myTAI::TAI(ExpressionSet) %>%
tibble::as_tibble(rownames = "Stage", colnames = c("TAI", "PS")) %>%
dplyr::bind_cols(as_tibble(std)) %>%
dplyr::rename(TAI = 2, sd = 3)
return(TI.out)
}
# function to process Fd. This will be changed to accommodate more seq data.
get_TAI_Fd <- function(PES_all, PES_M, PES_F, ordered_stages){
TAI_b <-
TI.preplot(
dplyr::select(PES_all, !c("gamete"))) %>%
dplyr::mutate(Sex = "Mixed")
stages_to_NA <- # this will differ for Fucus serratus
ordered_stages[-c(1, 1+1)]
TAI_M <-
TI.preplot(
PES_M) %>%
tibble::add_row(
dplyr::filter(
dplyr::select(TAI_b, !Sex),
Stage == ordered_stages[1+1])) %>%
tibble::add_row(
tibble::tibble(Stage = stages_to_NA, TAI = NA, sd = NA)
) %>%
dplyr::mutate(Sex = "Male")
TAI_F <-
TI.preplot(
PES_F) %>%
tibble::add_row(
dplyr::filter(
dplyr::select(TAI_b, !Sex),
Stage == ordered_stages[1+1])) %>%
tibble::add_row(
tibble::tibble(Stage = stages_to_NA, TAI = NA, sd = NA)
) %>%
dplyr::mutate(Sex = "Female") # This is not true - this is needed for the plotting.
TAI_out <-
dplyr::bind_rows(TAI_b, TAI_M, TAI_F)
TAI_out$Stage <- base::factor(TAI_out$Stage, ordered_stages)
return(TAI_out)
}
ordered_stages <- colnames(Fd_PES)[3:ncol(Fd_PES)]
Fd_TAI <-
get_TAI_Fd(
PES_all = Fd_PES,
PES_M = Fd_PES_M,
PES_F = Fd_PES_F,
ordered_stages)
Fd_TAI.log2 <-
get_TAI_Fd(
PES_all = Fd_PES.log2,
PES_M = dplyr::rename(Fd_PES_M.log2, gamete = V1),
PES_F = dplyr::rename(Fd_PES_F.log2, gamete = V1),
ordered_stages)
Fd_TAI.sqrt <-
get_TAI_Fd(
PES_all = Fd_PES.sqrt,
PES_M = dplyr::rename(Fd_PES_M.sqrt, gamete = V1),
PES_F = dplyr::rename(Fd_PES_F.sqrt, gamete = V1),
ordered_stages)
# function to process Fd. This will be changed to accommodate more seq data.
get_TAI_Fs <- function(PES_all, PES_M, PES_F, ordered_stages){
TAI_b <-
TI.preplot(
dplyr::select(PES_all, !c("gamete","matSP"))) %>%
dplyr::mutate(Sex = "Mixed")
stages_to_NA <- # this will differ for Fucus distichus
ordered_stages[-c(1, 1+1, length(ordered_stages) - 1, length(ordered_stages))]
TAI_M <-
TI.preplot(
PES_M) %>%
tibble::add_row(
dplyr::filter(
dplyr::select(TAI_b, !Sex),
Stage %in% c(
ordered_stages[1+1],
ordered_stages[length(ordered_stages) - 1]))) %>%
tibble::add_row(
tibble::tibble(Stage = stages_to_NA, TAI = NA, sd = NA)
) %>%
dplyr::mutate(Sex = "Male")
TAI_F <-
TI.preplot(
PES_F) %>%
tibble::add_row(
dplyr::filter(
dplyr::select(TAI_b, !Sex),
Stage %in% c(
ordered_stages[1+1],
ordered_stages[length(ordered_stages) - 1]))) %>%
tibble::add_row(
tibble::tibble(Stage = stages_to_NA, TAI = NA, sd = NA)
) %>%
dplyr::mutate(Sex = "Female") # This is not true. this is needed for the plotting.
TAI_out <-
dplyr::bind_rows(TAI_b, TAI_M, TAI_F)
TAI_out$Stage <- base::factor(TAI_out$Stage, ordered_stages)
return(TAI_out)
}
ordered_stages <- colnames(Fs_PES)[3:ncol(Fs_PES)]
Fs_TAI <-
get_TAI_Fs(
PES_all = Fs_PES,
PES_M = Fs_PES_M,
PES_F = Fs_PES_F,
ordered_stages)
Fs_TAI.log2 <-
get_TAI_Fs(
PES_all = Fs_PES.log2,
PES_M = Fs_PES_M.log2,
PES_F = Fs_PES_F.log2,
ordered_stages)
Fs_TAI.sqrt <-
get_TAI_Fs(
PES_all = Fs_PES.sqrt,
PES_M = Fs_PES_M.sqrt,
PES_F = Fs_PES_F.sqrt,
ordered_stages)
Fs_TAI %>%
ggplot2::ggplot(
aes(
y = TAI,
x = Stage,
group = Sex,
colour = Sex,
fill = Sex)) +
ggplot2::geom_ribbon(
aes(ymin = TAI - sd,
ymax = TAI + sd
), alpha = 0.1) +
ggplot2::geom_line(size = 2, lineend = "round") +
theme_classic() +
ggsci::scale_colour_npg() +
labs(title = "Fucus serratus",
subtitle = "TPM")
Fs_TAI.sqrt %>%
ggplot2::ggplot(
aes(
y = TAI,
x = Stage,
group = Sex,
colour = Sex,
fill = Sex)) +
ggplot2::geom_ribbon(
aes(ymin = TAI - sd,
ymax = TAI + sd
), alpha = 0.1) +
ggplot2::geom_line(size = 2, lineend = "round") +
theme_classic() +
ggsci::scale_colour_npg() +
labs(title = "Fucus serratus",
subtitle = "sqrt(TPM)")
Fs_TAI.log2 %>%
ggplot2::ggplot(
aes(
y = TAI,
x = Stage,
group = Sex,
colour = Sex,
fill = Sex)) +
ggplot2::geom_ribbon(
aes(ymin = TAI - sd,
ymax = TAI + sd
), alpha = 0.1) +
ggplot2::geom_line(size = 2, lineend = "round") +
theme_classic() +
ggsci::scale_colour_npg() +
labs(title = "Fucus serratus",
subtitle = "log2(TPM+\u03b1)")
Fd_TAI %>%
ggplot2::ggplot(
aes(
y = TAI,
x = Stage,
group = Sex,
colour = Sex,
fill = Sex)) +
ggplot2::geom_ribbon(
aes(ymin = TAI - sd,
ymax = TAI + sd
), alpha = 0.1) +
ggplot2::geom_line(size = 2, lineend = "round") +
theme_classic() +
ggsci::scale_colour_npg() +
labs(title = "Fucus distichus",
subtitle = "TPM")
Fd_TAI.sqrt %>%
ggplot2::ggplot(
aes(
y = TAI,
x = Stage,
group = Sex,
colour = Sex,
fill = Sex)) +
ggplot2::geom_ribbon(
aes(ymin = TAI - sd,
ymax = TAI + sd
), alpha = 0.1) +
ggplot2::geom_line(size = 2, lineend = "round") +
theme_classic() +
ggsci::scale_colour_npg() +
labs(title = "Fucus distichus",
subtitle = "sqrt(TPM)")
Fd_TAI.log2 %>%
ggplot2::ggplot(
aes(
y = TAI,
x = Stage,
group = Sex,
colour = Sex,
fill = Sex)) +
ggplot2::geom_ribbon(
aes(ymin = TAI - sd,
ymax = TAI + sd
), alpha = 0.1) +
ggplot2::geom_line(size = 2, lineend = "round") +
theme_classic() +
ggsci::scale_colour_npg() +
labs(title = "Fucus distichus",
subtitle = "log2(TPM+\u03b1)")
F. serratus
Fs_PES_tS <-
tfStability(
dplyr::select(Fs_PES, !c(gamete, matSP)),
transforms = c("none", "sqrt", "log2", "rank"),
permutations = 500
)
Fs_PES_tS_processed <-
tibble::as_tibble(Fs_PES_tS, rownames = "transformation")
Fs_PES_tS_res <-
Fs_PES_tS_processed %>%
dplyr::rename("pval" = value) %>%
dplyr::mutate(test = "FlatLineTest")
F. distichus
Fd_PES_tS <-
tfStability(
dplyr::select(Fd_PES, !c(gamete, matSP)),
transforms = c("none", "sqrt", "log2", "rank"),
permutations = 500
)
Fd_PES_tS_processed <-
tibble::as_tibble(Fd_PES_tS, rownames = "transformation")
Fd_PES_tS_res <-
Fd_PES_tS_processed %>%
dplyr::rename("pval" = value) %>%
dplyr::mutate(test = "FlatLineTest")
Fucus serratus development
Fs_PES_tS_res %>%
ggplot2::ggplot(
aes(
y = -log10(pval),
x = transformation)) +
ggplot2::geom_col(width = 0.05) +
ggplot2::geom_point(size = 5) +
ggplot2::geom_hline(
yintercept = -log10(0.05),
colour = "#E64B35FF",
linetype = 'dashed',
size = 2
) +
ggplot2::labs(
title = "Fucus serratus",
subtitle = "FlatLineTest",
x = "Transformation"
) +
ggplot2::coord_flip() +
ggplot2::theme_classic()
# save plot
# ggplot2::ggsave(filename = "Fs_TAI_denoised_embryo_FLT.pdf", device = "pdf", width = 3, height = 2.5)
Fucus distichus development
Fd_PES_tS_res %>%
ggplot2::ggplot(
aes(
y = -log10(pval),
x = transformation)) +
ggplot2::geom_col(width = 0.05) +
ggplot2::geom_point(size = 5) +
ggplot2::geom_hline(
yintercept = -log10(0.05),
colour = "#E64B35FF",
linetype = 'dashed',
size = 2
) +
ggplot2::labs(
title = "Fucus distichus",
subtitle = "FlatLineTest",
x = "Transformation"
) +
ggplot2::coord_flip() +
ggplot2::theme_classic()
# save plot
# ggplot2::ggsave(filename = "Fd_TAI_denoised_embryo_FLT.pdf", device = "pdf", width = 3, height = 2.5)
Fs_PES_tS <-
tfStability(
dplyr::select(Fs_PES, !c(gamete, matSP)),
TestStatistic = "ReductiveHourglassTest",
transforms = c("none", "sqrt", "log2", "rank"),
modules = list(early = 1:2, mid = 3:4, late = 5),
permutations = 500
)
Fs_PES_tS_processed <-
tibble::as_tibble(Fs_PES_tS, rownames = "transformation")
Fs_PES_tS_res <-
Fs_PES_tS_processed %>%
dplyr::rename("pval" = value) %>%
dplyr::mutate(test = "ReductiveHourglassTest")
Fd_PES_tS <-
tfStability(
dplyr::select(Fd_PES, !c(gamete, matSP)),
TestStatistic = "ReductiveHourglassTest",
transforms = c("none", "sqrt", "log2", "rank"),
modules = list(early = 1:4, mid = 5, late = 6),
permutations = 500
)
Fd_PES_tS_processed <-
tibble::as_tibble(Fd_PES_tS, rownames = "transformation")
Fd_PES_tS_res <-
Fd_PES_tS_processed %>%
dplyr::rename("pval" = value) %>%
dplyr::mutate(test = "ReductiveHourglassTest")
Fucus serratus development
Fs_PES_tS_res %>%
ggplot2::ggplot(
aes(
y = -log10(pval),
x = transformation)) +
ggplot2::geom_col(width = 0.05) +
ggplot2::geom_point(size = 5) +
ggplot2::geom_hline(
yintercept = -log10(0.05),
colour = "#E64B35FF",
linetype = 'dashed',
size = 2
) +
ggplot2::labs(
title = "Fucus serratus",
subtitle = "ReductiveHourglassTest",
x = "Transformation"
) +
ggplot2::coord_flip() +
ggplot2::theme_classic()
# save plot
# ggplot2::ggsave(filename = "Fs_TAI_denoised_embryo_RHT.pdf", device = "pdf", width = 3, height = 2.5)
Fucus distichus development
Fd_PES_tS_res %>%
ggplot2::ggplot(
aes(
y = -log10(pval),
x = transformation)) +
ggplot2::geom_col(width = 0.05) +
ggplot2::geom_point(size = 5) +
ggplot2::geom_hline(
yintercept = -log10(0.05),
colour = "#E64B35FF",
linetype = 'dashed',
size = 2
) +
ggplot2::labs(
title = "Fucus distichus",
subtitle = "ReductiveHourglassTest",
x = "Transformation"
) +
ggplot2::coord_flip() +
ggplot2::theme_classic()
# y = -log10(0.05) + max(Fd_PES_tS_res)/9,
# or perhaps we can use raw Pvalues
# save plot
# ggplot2::ggsave(filename = "Fd_TAI_denoised_embryo_RHT.pdf", device = "pdf", width = 3, height = 2.5)
#Fs
Fs_TAI %>%
dplyr::filter(!Stage %in% c("gamete", "matSP") & Sex == "Mixed") %>%
ggplot2::ggplot(
aes(
y = TAI,
x = Stage,
group = 1)) +
ggplot2::geom_ribbon(
aes(ymin = TAI - sd,
ymax = TAI + sd
),
colour = "#00A087FF",
fill = "#00A087FF",
alpha = 0.1) +
ggplot2::geom_line(
size = 2,
lineend = "round",
colour = "#00A087FF") +
theme_classic() +
ggsci::scale_colour_npg() +
labs(title = "Fucus serratus",
subtitle = "TPM")
Fs_TAI.sqrt %>%
dplyr::filter(!Stage %in% c("gamete", "matSP") & Sex == "Mixed") %>%
ggplot2::ggplot(
aes(
y = TAI,
x = Stage,
group = 1)) +
ggplot2::geom_ribbon(
aes(ymin = TAI - sd,
ymax = TAI + sd
),
colour = "#00A087FF",
fill = "#00A087FF",
alpha = 0.1) +
ggplot2::geom_line(
size = 2,
lineend = "round",
colour = "#00A087FF") +
theme_classic() +
ggsci::scale_colour_npg() +
labs(title = "Fucus serratus",
subtitle = "sqrt(TPM)")
# save plot
# ggplot2::ggsave(filename = "Fs_TAI_denoised_embryo.pdf", device = "pdf", width = 4.5, height = 2.5)
Fs_TAI.log2 %>%
dplyr::filter(!Stage %in% c("gamete", "matSP") & Sex == "Mixed") %>%
ggplot2::ggplot(
aes(
y = TAI,
x = Stage,
group = 1)) +
ggplot2::geom_ribbon(
aes(ymin = TAI - sd,
ymax = TAI + sd
),
colour = "#00A087FF",
fill = "#00A087FF",
alpha = 0.1) +
ggplot2::geom_line(
size = 2,
lineend = "round",
colour = "#00A087FF") +
theme_classic() +
ggsci::scale_colour_npg() +
labs(title = "Fucus serratus",
subtitle = "log2(TPM+\u03b1)")
# Fd
Fd_TAI %>%
dplyr::filter(!Stage %in% c("gamete", "matSP") & Sex == "Mixed") %>%
ggplot2::ggplot(
aes(
y = TAI,
x = Stage,
group = 1)) +
ggplot2::geom_ribbon(
aes(ymin = TAI - sd,
ymax = TAI + sd
),
colour = "#00A087FF",
fill = "#00A087FF",
alpha = 0.1) +
ggplot2::geom_line(
size = 2,
lineend = "round",
colour = "#00A087FF") +
theme_classic() +
ggsci::scale_colour_npg() +
labs(title = "Fucus distichus",
subtitle = "TPM")
Fd_TAI.sqrt %>%
dplyr::filter(!Stage %in% c("gamete", "matSP") & Sex == "Mixed") %>%
ggplot2::ggplot(
aes(
y = TAI,
x = Stage,
group = 1)) +
ggplot2::geom_ribbon(
aes(ymin = TAI - sd,
ymax = TAI + sd
),
colour = "#00A087FF",
fill = "#00A087FF",
alpha = 0.1) +
ggplot2::geom_line(
size = 2,
lineend = "round",
colour = "#00A087FF") +
theme_classic() +
ggsci::scale_colour_npg() +
labs(title = "Fucus distichus",
subtitle = "sqrt(TPM)")
# save plot
# ggplot2::ggsave(filename = "Fd_TAI_denoised_embryo.pdf", device = "pdf", width = 4.5, height = 2.5)
Fd_TAI.log2 %>%
dplyr::filter(!Stage %in% c("gamete", "matSP") & Sex == "Mixed") %>%
ggplot2::ggplot(
aes(
y = TAI,
x = Stage,
group = 1)) +
ggplot2::geom_ribbon(
aes(ymin = TAI - sd,
ymax = TAI + sd
),
colour = "#00A087FF",
fill = "#00A087FF",
alpha = 0.1) +
ggplot2::geom_line(
size = 2,
lineend = "round",
colour = "#00A087FF") +
theme_classic() +
ggsci::scale_colour_npg() +
labs(title = "Fucus distichus",
subtitle = "log2(TPM+\u03b1)")
We examine here whether there is a significant difference between the TAI of male and female strains of Ectocarpus.
library(tidyverse)
library(myTAI)
Non-transformed
Ec_PES_32m <-
readr::read_csv(file = "data/Ec_PES_32m.csv", show_col_types = FALSE)
Ec_PES_25f <-
readr::read_csv(file = "data/Ec_PES_25f.csv", show_col_types = FALSE)
sqrt-transformed
Ec_PES_32m.sqrt <-
readr::read_csv(file = "data/Ec_PES_32m.sqrt.csv", show_col_types = FALSE)
Ec_PES_25f.sqrt <-
readr::read_csv(file = "data/Ec_PES_25f.sqrt.csv", show_col_types = FALSE)
log2-transformed
Ec_PES_32m.log2 <-
readr::read_csv(file = "data/Ec_PES_32m.log2.csv", show_col_types = FALSE)
Ec_PES_25f.log2 <-
readr::read_csv(file = "data/Ec_PES_25f.log2.csv", show_col_types = FALSE)
rank-transformed
Ec_PES_32m.rank <-
readr::read_csv(file = "data/Ec_PES_32m.rank.csv", show_col_types = FALSE)
Ec_PES_25f.rank <-
readr::read_csv(file = "data/Ec_PES_25f.rank.csv", show_col_types = FALSE)
rlog-tranformed
Ec_PES_32m.rlog <-
readr::read_csv(file = "data/Ec_PES_32m.rlog.csv", show_col_types = FALSE)
Ec_PES_25f.rlog <-
readr::read_csv(file = "data/Ec_PES_25f.rlog.csv", show_col_types = FALSE)
# calculate p-values based on the flat line test, then adjust for multiple comparisons.
hack_flt <- function(PES_m, PES_f, permutations = 500, tr_name = "", padj = F){
pval_df <- data.frame()
for(i in colnames(PES_m)[-c(1,2)]){
join_test <- dplyr::left_join(
dplyr::select(PES_m, 1, 2, starts_with(i)),
dplyr::select(PES_f, 1, 2, starts_with(i)),
by = c("GeneID", "PS"))
# join_test_df <- rbind(join_test)
# where x is male and y is female
pval <- myTAI::FlatLineTest(join_test, permutations = permutations)
pval_df <-
dplyr::bind_rows(
pval_df,
tibble::tibble(
stage = i,
p.value = pval$p.value,
transformation = tr_name))
}
pval_df$stage <- base::factor(pval_df$stage, colnames(PES_m)[-c(1,2)])
if(!padj){return(pval_df)}
# to adjust p values
p_value <- pval_df$p.value
# Apply Bonferroni correction to the p-value
p_adjusted <- p.adjust(p_value, method = "bonferroni")
pval_df$padj <- p_adjusted
return(pval_df)
}
permutations = 500
Ec_PES_perm <-
dplyr::bind_rows(
hack_flt(Ec_PES_32m, Ec_PES_25f, tr_name = "none", padj = T),
hack_flt(Ec_PES_32m.sqrt, Ec_PES_25f.sqrt, tr_name = "sqrt", padj = T),
hack_flt(Ec_PES_32m.log2, Ec_PES_25f.log2, tr_name = "log2", padj = T),
hack_flt(Ec_PES_32m.rank, Ec_PES_25f.rank, tr_name = "rank", padj = T),
hack_flt(Ec_PES_32m.rlog, Ec_PES_25f.rlog, tr_name = "rlog", padj = T)
) %>%
dplyr::mutate(test = "permutation test")
##
## Total runtime of your permutation test: 0.162 seconds.
## Total runtime of your permutation test: 0.15 seconds.
## Total runtime of your permutation test: 0.15 seconds.
## Total runtime of your permutation test: 0.145 seconds.
## Total runtime of your permutation test: 0.15 seconds.
## Total runtime of your permutation test: 0.147 seconds.
## Total runtime of your permutation test: 0.152 seconds.
## Total runtime of your permutation test: 0.145 seconds.
## Total runtime of your permutation test: 0.15 seconds.
## Total runtime of your permutation test: 0.148 seconds.
## Total runtime of your permutation test: 0.15 seconds.
## Total runtime of your permutation test: 0.148 seconds.
## Total runtime of your permutation test: 0.146 seconds.
## Total runtime of your permutation test: 0.148 seconds.
## Total runtime of your permutation test: 0.147 seconds.
## Total runtime of your permutation test: 0.147 seconds.
## Total runtime of your permutation test: 0.148 seconds.
## Total runtime of your permutation test: 0.151 seconds.
## Total runtime of your permutation test: 0.148 seconds.
## Total runtime of your permutation test: 0.147 seconds.
## Total runtime of your permutation test: 0.146 seconds.
## Total runtime of your permutation test: 0.148 seconds.
## Total runtime of your permutation test: 0.147 seconds.
## Total runtime of your permutation test: 0.146 seconds.
## Total runtime of your permutation test: 0.147 seconds.
## Total runtime of your permutation test: 0.146 seconds.
## Total runtime of your permutation test: 0.146 seconds.
## Total runtime of your permutation test: 0.147 seconds.
## Total runtime of your permutation test: 0.148 seconds.
## Total runtime of your permutation test: 0.146 seconds.
## Total runtime of your permutation test: 0.149 seconds.
## Total runtime of your permutation test: 0.147 seconds.
## Total runtime of your permutation test: 0.148 seconds.
## Total runtime of your permutation test: 0.15 seconds.
## Total runtime of your permutation test: 0.146 seconds.
## Total runtime of your permutation test: 0.147 seconds.
## Total runtime of your permutation test: 0.149 seconds.
## Total runtime of your permutation test: 0.147 seconds.
## Total runtime of your permutation test: 0.147 seconds.
## Total runtime of your permutation test: 0.146 seconds.
## Total runtime of your permutation test: 0.147 seconds.
## Total runtime of your permutation test: 0.146 seconds.
## Total runtime of your permutation test: 0.146 seconds.
## Total runtime of your permutation test: 0.146 seconds.
## Total runtime of your permutation test: 0.146 seconds.
alpha = 2.22e-16 # so we do not get infinite values
Ec_PES_perm %>%
ggplot2::ggplot(
aes(
y = -log10(padj),
x = stage,
group = transformation,
linetype = transformation)) +
ggplot2::geom_line(width = 0.05, alpha = 0.5) +
ggplot2::geom_point(size = 5) +
ggplot2::geom_hline(
yintercept = -log10(0.05),
colour = "#E64B35FF",
linetype = 'dashed',
size = 1
) +
ggplot2::labs(
title = "Ectocarpus",
subtitle = "Permutation test; TAI male > female",
x = "Stage",
y = "-log10(p_adjusted)"
) +
ggplot2::coord_flip() +
ggplot2::theme_classic()
## Warning in ggplot2::geom_line(width = 0.05, alpha = 0.5): Ignoring unknown
## parameters: `width`
ggplot2::ggsave(filename = "Ec_sex_permutation_test.pdf", device = "pdf", width = 6, height = 4)
Ec_PES_perm %>%
dplyr::mutate(
pval_label = case_when(
padj > 0.05 ~ "ns",
padj <= 0.05 & padj > 0.01 ~ "*",
padj <= 0.01 & padj > 0.001 ~ "**",
padj <= 0.001 ~ "***",
.default = NA
)
) %>%
ggplot2::ggplot(
aes(
y = stage,
label = pval_label,
x = transformation,
fill = -log10(padj + alpha))) +
ggplot2::geom_tile(colour = "black", size = 1) +
ggplot2::geom_text() +
ggplot2::theme_void() +
ggplot2::theme(legend.position = "",
axis.title.y = element_text(angle = 90),
axis.text.y = element_text(),
axis.text.x = element_text()) +
ggplot2::coord_flip() +
ggplot2::scale_fill_steps2(limits = c(0,3),
low = "white", high = "#E64B35FF") +
ggplot2::labs(title = "Fucus serratus",
subtitle = "Male TAI & female TAI differ?")
Tau is used here to calculate stage specificity
library(tidyverse)
library(myTAI)
specificity <- function(PES, measure = "tau", drop_Na = T){
n_expcol <- ncol(PES) - 2 # e.g. stages or tissue
ExpressionSet <- PES[,-1]
if (measure == "tau") {
norm_expression <- t(apply(
ExpressionSet[, -1], # geneID
1,
function(row) row / max(row))) # normalise expression
specificity_index <- data.frame(
# calculate Tau and return ExpressionSet with tau
tau = apply(
norm_expression,
1,
function(row) sum(1 - row) / (n_expcol-1)),
GeneID = ExpressionSet[, 1])
}
if (drop_Na) {
specificity_index <- na.omit(specificity_index)
}
return(specificity_index)
}
Non-transformed
Fd_PES <-
readr::read_csv(file = "data/Fd_PES.csv") %>% select(-c(gamete,matSP))
Fs_PES <-
readr::read_csv(file = "data/Fs_PES.csv") %>% select(-c(gamete,matSP))
sqrt-tranformed
Fd_PES.sqrt <-
readr::read_csv(file = "data/Fd_PES.sqrt.csv") %>% select(-c(gamete,matSP))
Fs_PES.sqrt <-
readr::read_csv(file = "data/Fs_PES.sqrt.csv") %>% select(-c(gamete,matSP))
log2-tranformed
Fd_PES.log2 <-
readr::read_csv(file = "data/Fd_PES.log2.csv") %>% select(-c(gamete,matSP))
Fs_PES.log2 <-
readr::read_csv(file = "data/Fs_PES.log2.csv") %>% select(-c(gamete,matSP))
rank-tranformed
Fd_PES.rank <-
readr::read_csv(file = "data/Fd_PES.rank.csv") %>% select(-c(gamete,matSP))
Fs_PES.rank <-
readr::read_csv(file = "data/Fs_PES.rank.csv") %>% select(-c(gamete,matSP))
rlog-tranformed
Fd_PES.rlog <-
readr::read_csv(file = "data/Fd_PES.rlog.csv") %>% select(-c(gamete,matSP))
Fs_PES.rlog <-
readr::read_csv(file = "data/Fs_PES.rlog.csv") %>% select(-c(gamete,matSP))
Fd_PES_tau <- specificity(Fd_PES)
Fs_PES_tau <- specificity(Fs_PES)
Fd_PES_tau.sqrt <- specificity(Fd_PES.sqrt)
Fs_PES_tau.sqrt <- specificity(Fs_PES.sqrt)
Fd_PES_tau.log2 <- specificity(Fd_PES.log2)
Fs_PES_tau.log2 <- specificity(Fs_PES.log2)
Fd_PES_tau.rank <- specificity(Fd_PES.rank)
Fs_PES_tau.rank <- specificity(Fs_PES.rank)
hist(Fd_PES_tau$tau, breaks = 100)
hist(Fs_PES_tau$tau, breaks = 100)
hist(Fd_PES_tau.sqrt$tau, breaks = 100)
hist(Fs_PES_tau.sqrt$tau, breaks = 100)
hist(Fd_PES_tau.log2$tau, breaks = 100)
hist(Fs_PES_tau.log2$tau, breaks = 100)
hist(Fd_PES_tau.rank$tau, breaks = 100)
hist(Fs_PES_tau.rank$tau, breaks = 100)
This is analogous to divergence map
tau_map <- function(tauExpressionSet, n_quantile = 10){
GeneID <- tau_strata <- NULL
data.table::setDT(tauExpressionSet)
data.table::setkeyv(tauExpressionSet, c("tau", "GeneID"))
tau_tbl_tauMap <- data.table::as.data.table(dplyr::select(dtplyr::lazy_dt(tauExpressionSet),
tau, GeneID))
QuantileValues <- stats::quantile(tau_tbl_tauMap[, tau],
probs = seq(0, 1, (1/{
{
n_quantile
}
})), na.rm = TRUE)
if (any(duplicated(QuantileValues))) {
stop("ERROR: there is at least one duplicate threshold in the quantile analysis. Please select a lower value as n_quantile.")
}
else {
}
tau_tbl_tauMap$tau <- base::cut(tau_tbl_tauMap[, tau],
breaks = QuantileValues, include.lowest = TRUE, labels = FALSE)
data.table::setnames(tau_tbl_tauMap, old = "tau", new = "tau_strata")
return(tau_tbl_tauMap[, list(tau_strata, GeneID)])
}
Fd_PES_tau.quantile <- tau_map(Fd_PES_tau)
Fs_PES_tau.quantile <- tau_map(Fs_PES_tau)
Fd_PES_tau.sqrt.quantile <- tau_map(Fd_PES_tau.sqrt)
Fs_PES_tau.sqrt.quantile <- tau_map(Fs_PES_tau.sqrt)
Fd_PES_tau.log2.quantile <- tau_map(Fd_PES_tau.log2)
Fs_PES_tau.log2.quantile <- tau_map(Fs_PES_tau.log2)
Fd_PES_tau.rank.quantile <- tau_map(Fd_PES_tau.rank)
Fs_PES_tau.rank.quantile <- tau_map(Fs_PES_tau.rank)
dplyr::left_join(Fd_PES_tau, Fd_PES) %>%
ggplot2::ggplot(aes(x = PS, y = tau)) +
ggplot2::geom_jitter(alpha = 0.1) +
ggplot2::geom_smooth(method = "lm")
## Joining with `by = join_by(GeneID)`
## `geom_smooth()` using formula = 'y ~ x'
dplyr::left_join(Fs_PES_tau, Fs_PES) %>%
ggplot2::ggplot(aes(x = PS, y = tau)) +
ggplot2::geom_jitter(alpha = 0.1) +
ggplot2::geom_smooth(method = "lm")
## Joining with `by = join_by(GeneID)`
## `geom_smooth()` using formula = 'y ~ x'
dplyr::left_join(Fd_PES_tau.sqrt, Fd_PES) %>%
ggplot2::ggplot(aes(x = PS, y = tau)) +
ggplot2::geom_jitter(alpha = 0.1) +
ggplot2::geom_smooth(method = "lm")
## Joining with `by = join_by(GeneID)`
## `geom_smooth()` using formula = 'y ~ x'
dplyr::left_join(Fs_PES_tau.sqrt, Fs_PES) %>%
ggplot2::ggplot(aes(x = PS, y = tau)) +
ggplot2::geom_jitter(alpha = 0.1) +
ggplot2::geom_smooth(method = "lm")
## Joining with `by = join_by(GeneID)`
## `geom_smooth()` using formula = 'y ~ x'
dplyr::left_join(Fd_PES_tau.log2, Fd_PES) %>%
ggplot2::ggplot(aes(x = PS, y = tau)) +
ggplot2::geom_jitter(alpha = 0.1) +
ggplot2::geom_smooth(method = "lm")
## Joining with `by = join_by(GeneID)`
## `geom_smooth()` using formula = 'y ~ x'
dplyr::left_join(Fs_PES_tau.log2, Fs_PES) %>%
ggplot2::ggplot(aes(x = PS, y = tau)) +
ggplot2::geom_jitter(alpha = 0.1) +
ggplot2::geom_smooth(method = "lm")
## Joining with `by = join_by(GeneID)`
## `geom_smooth()` using formula = 'y ~ x'
# box plots instead
dplyr::left_join(Fd_PES_tau, Fd_PES) %>%
ggplot2::ggplot(aes(x = as.factor(PS), y = tau)) +
ggplot2::geom_jitter(alpha = 0.1) +
ggplot2::geom_boxplot(width = 0.2)
## Joining with `by = join_by(GeneID)`
dplyr::left_join(Fs_PES_tau, Fs_PES) %>%
ggplot2::ggplot(aes(x = as.factor(PS), y = tau)) +
ggplot2::geom_jitter(alpha = 0.1) +
ggplot2::geom_boxplot(width = 0.2)
## Joining with `by = join_by(GeneID)`
dplyr::left_join(Fd_PES_tau.sqrt, Fd_PES) %>%
ggplot2::ggplot(aes(x = as.factor(PS), y = tau)) +
ggplot2::geom_jitter(alpha = 0.1) +
ggplot2::geom_boxplot(width = 0.2)
## Joining with `by = join_by(GeneID)`
dplyr::left_join(Fs_PES_tau.sqrt, Fs_PES) %>%
ggplot2::ggplot(aes(x = as.factor(PS), y = tau)) +
ggplot2::geom_jitter(alpha = 0.1) +
ggplot2::geom_boxplot(width = 0.2)
## Joining with `by = join_by(GeneID)`
dplyr::left_join(Fd_PES_tau.log2, Fd_PES) %>%
ggplot2::ggplot(aes(x = as.factor(PS), y = tau)) +
ggplot2::geom_jitter(alpha = 0.1) +
ggplot2::geom_boxplot(width = 0.2)
## Joining with `by = join_by(GeneID)`
dplyr::left_join(Fs_PES_tau.log2, Fs_PES) %>%
ggplot2::ggplot(aes(x = as.factor(PS), y = tau)) +
ggplot2::geom_jitter(alpha = 0.1) +
ggplot2::geom_boxplot(width = 0.2)
## Joining with `by = join_by(GeneID)`
dplyr::left_join(Fd_PES_tau.quantile, Fd_PES) %>%
ggplot2::ggplot(aes(x = PS, y = tau_strata)) +
ggplot2::geom_jitter(alpha = 0.1) +
ggplot2::geom_smooth(method = "lm")
## Joining with `by = join_by(GeneID)`
## `geom_smooth()` using formula = 'y ~ x'
dplyr::left_join(Fs_PES_tau.quantile, Fs_PES) %>%
ggplot2::ggplot(aes(x = PS, y = tau_strata)) +
ggplot2::geom_jitter(alpha = 0.1) +
ggplot2::geom_smooth(method = "lm")
## Joining with `by = join_by(GeneID)`
## `geom_smooth()` using formula = 'y ~ x'
dplyr::left_join(Fd_PES_tau.sqrt.quantile, Fd_PES) %>%
ggplot2::ggplot(aes(x = PS, y = tau_strata)) +
ggplot2::geom_jitter(alpha = 0.1) +
ggplot2::geom_smooth(method = "lm") +
ggplot2::labs(title = "Fd tau-strata",
subtitle = "sqrt")
## Joining with `by = join_by(GeneID)`
## `geom_smooth()` using formula = 'y ~ x'
# ggplot2::ggsave(filename = "Fd_PS_tau.pdf", device = "pdf", width = 3, height = 3)
dplyr::left_join(Fs_PES_tau.sqrt.quantile, Fs_PES) %>%
ggplot2::ggplot(aes(x = PS, y = tau_strata)) +
ggplot2::geom_jitter(alpha = 0.1) +
ggplot2::geom_smooth(method = "lm") +
ggplot2::labs(title = "Fs tau-strata",
subtitle = "sqrt")
## Joining with `by = join_by(GeneID)`
## `geom_smooth()` using formula = 'y ~ x'
# ggplot2::ggsave(filename = "Fs_PS_tau.pdf", device = "pdf", width = 3, height = 3)
dplyr::left_join(Fd_PES_tau.log2.quantile, Fd_PES) %>%
ggplot2::ggplot(aes(x = PS, y = tau_strata)) +
ggplot2::geom_jitter(alpha = 0.1) +
ggplot2::geom_smooth(method = "lm")
## Joining with `by = join_by(GeneID)`
## `geom_smooth()` using formula = 'y ~ x'
dplyr::left_join(Fs_PES_tau.log2.quantile, Fs_PES) %>%
ggplot2::ggplot(aes(x = PS, y = tau_strata)) +
ggplot2::geom_jitter(alpha = 0.1) +
ggplot2::geom_smooth(method = "lm")
## Joining with `by = join_by(GeneID)`
## `geom_smooth()` using formula = 'y ~ x'
corr_from_tauPES <- function(PES, PES_tau){
input_data <- dplyr::left_join(
PES_tau,
dplyr::select(PES, c(1, 2)), by = "GeneID")
return(stats::cor.test(input_data$PS, input_data$tau_strata))
}
corr_from_tauPES(PES_tau = Fd_PES_tau.quantile, PES = Fd_PES)
##
## Pearson's product-moment correlation
##
## data: input_data$PS and input_data$tau_strata
## t = 3.9535, df = 7893, p-value = 7.77e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.02241887 0.06644955
## sample estimates:
## cor
## 0.0444558
corr_from_tauPES(PES_tau = Fs_PES_tau.quantile, PES = Fs_PES)
##
## Pearson's product-moment correlation
##
## data: input_data$PS and input_data$tau_strata
## t = 3.9597, df = 8133, p-value = 7.569e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.02215486 0.06553341
## sample estimates:
## cor
## 0.04386481
corr_from_tauPES(PES_tau = Fd_PES_tau.sqrt.quantile, PES = Fd_PES)
##
## Pearson's product-moment correlation
##
## data: input_data$PS and input_data$tau_strata
## t = 4.2883, df = 7893, p-value = 1.822e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.02618147 0.07019679
## sample estimates:
## cor
## 0.04821253
corr_from_tauPES(PES_tau = Fs_PES_tau.sqrt.quantile, PES = Fs_PES)
##
## Pearson's product-moment correlation
##
## data: input_data$PS and input_data$tau_strata
## t = 4.3566, df = 8133, p-value = 1.337e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.02654927 0.06991026
## sample estimates:
## cor
## 0.0482525
corr_from_tauPES(PES_tau = Fd_PES_tau.log2.quantile, PES = Fd_PES)
##
## Pearson's product-moment correlation
##
## data: input_data$PS and input_data$tau_strata
## t = 11.133, df = 7893, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1025575 0.1459936
## sample estimates:
## cor
## 0.1243351
corr_from_tauPES(PES_tau = Fs_PES_tau.log2.quantile, PES = Fs_PES)
##
## Pearson's product-moment correlation
##
## data: input_data$PS and input_data$tau_strata
## t = 11.218, df = 8133, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1019871 0.1447872
## sample estimates:
## cor
## 0.1234446
The correlation between PS and tau_strata is very weak. Therefore, we have a statistically independent measure.
Fd_SpES <- myTAI::MatchMap(Fd_PES_tau.quantile, Fd_PES[-1])
Fs_SpES <- myTAI::MatchMap(Fs_PES_tau.quantile, Fs_PES[-1])
Fd_SpES.sqrt <- myTAI::MatchMap(Fd_PES_tau.sqrt.quantile, Fd_PES.sqrt[-1])
Fs_SpES.sqrt <- myTAI::MatchMap(Fs_PES_tau.sqrt.quantile, Fs_PES.sqrt[-1])
Fd_SpES.log2 <- myTAI::MatchMap(Fd_PES_tau.log2.quantile, Fd_PES.log2[-1])
Fs_SpES.log2 <- myTAI::MatchMap(Fs_PES_tau.log2.quantile, Fs_PES.log2[-1])
(Transcriptome Specificity Index). We should keep in mind that we are only retaining the embryo stages.
#TI stands for transcriptome index
TI.preplot <- function(ExpressionSet, permutations = 500){
std <-
myTAI::bootMatrix(
ExpressionSet = ExpressionSet,
permutations = permutations) %>%
apply(2, stats::sd)
TI.out <-
myTAI::TAI(ExpressionSet) %>%
tibble::as_tibble(rownames = "Stage", colnames = c("TAI", "PS")) %>%
dplyr::bind_cols(as_tibble(std)) %>%
dplyr::rename(TAI = 2, sd = 3)
return(TI.out)
}
get_TAI <- function(SpES, ordered_stages){
TAI <-
TI.preplot(SpES)
TAI_out <-
TAI
TAI_out$Stage <- base::factor(TAI_out$Stage, ordered_stages)
return(TAI_out)
}
get_TAI(Fd_SpES, ordered_stages = colnames(Fd_SpES)[-c(1,2)]) %>%
ggplot2::ggplot(
aes(
y = TAI,
x = Stage,
group = 1)) +
ggplot2::geom_ribbon(
aes(ymin = TAI - sd,
ymax = TAI + sd
),
colour = "#00A087FF",
fill = "#00A087FF",
alpha = 0.1) +
ggplot2::geom_line(
size = 2,
lineend = "round",
colour = "#00A087FF") +
theme_classic() +
ggsci::scale_colour_npg() +
labs(title = "Fucus distichus",
subtitle = "TPM",
y = "TSpI")
## New names:
## • `value` -> `value...2`
## • `value` -> `value...3`
get_TAI(Fs_SpES, ordered_stages = colnames(Fs_SpES)[-c(1,2)]) %>%
ggplot2::ggplot(
aes(
y = TAI,
x = Stage,
group = 1)) +
ggplot2::geom_ribbon(
aes(ymin = TAI - sd,
ymax = TAI + sd
),
colour = "#00A087FF",
fill = "#00A087FF",
alpha = 0.1) +
ggplot2::geom_line(
size = 2,
lineend = "round",
colour = "#00A087FF") +
theme_classic() +
ggsci::scale_colour_npg() +
labs(title = "Fucus serratus",
subtitle = "TPM",
y = "TSpI")
## New names:
## • `value` -> `value...2`
## • `value` -> `value...3`
# sqrt transformation
get_TAI(Fd_SpES.sqrt, ordered_stages = colnames(Fd_SpES)[-c(1,2)]) %>%
ggplot2::ggplot(
aes(
y = TAI,
x = Stage,
group = 1)) +
ggplot2::geom_ribbon(
aes(ymin = TAI - sd,
ymax = TAI + sd
),
colour = "#00A087FF",
fill = "#00A087FF",
alpha = 0.1) +
ggplot2::geom_line(
size = 2,
lineend = "round",
colour = "#00A087FF") +
theme_classic() +
ggsci::scale_colour_npg() +
labs(title = "Fucus distichus",
subtitle = "sqrt(TPM)",
y = "TSI")
## New names:
## • `value` -> `value...2`
## • `value` -> `value...3`
# ggplot2::ggsave(filename = "Fd_TSI_embryo.pdf", device = "pdf", width = 4.5, height = 2.5)
get_TAI(Fs_SpES.sqrt, ordered_stages = colnames(Fs_SpES)[-c(1,2)]) %>%
ggplot2::ggplot(
aes(
y = TAI,
x = Stage,
group = 1)) +
ggplot2::geom_ribbon(
aes(ymin = TAI - sd,
ymax = TAI + sd
),
colour = "#00A087FF",
fill = "#00A087FF",
alpha = 0.1) +
ggplot2::geom_line(
size = 2,
lineend = "round",
colour = "#00A087FF") +
theme_classic() +
ggsci::scale_colour_npg() +
labs(title = "Fucus serratus",
subtitle = "sqrt(TPM)",
y = "TSI")
## New names:
## • `value` -> `value...2`
## • `value` -> `value...3`
# ggplot2::ggsave(filename = "Fs_TSI_embryo.pdf", device = "pdf", width = 4.5, height = 2.5)
# log transformation
get_TAI(Fd_SpES.log2, ordered_stages = colnames(Fd_SpES)[-c(1,2)]) %>%
ggplot2::ggplot(
aes(
y = TAI,
x = Stage,
group = 1)) +
ggplot2::geom_ribbon(
aes(ymin = TAI - sd,
ymax = TAI + sd
),
colour = "#00A087FF",
fill = "#00A087FF",
alpha = 0.1) +
ggplot2::geom_line(
size = 2,
lineend = "round",
colour = "#00A087FF") +
theme_classic() +
ggsci::scale_colour_npg() +
labs(title = "Fucus distichus",
subtitle = "log2(TPM+1)",
y = "TSpI")
## New names:
## • `value` -> `value...2`
## • `value` -> `value...3`
get_TAI(Fs_SpES.log2, ordered_stages = colnames(Fs_SpES)[-c(1,2)]) %>%
ggplot2::ggplot(
aes(
y = TAI,
x = Stage,
group = 1)) +
ggplot2::geom_ribbon(
aes(ymin = TAI - sd,
ymax = TAI + sd
),
colour = "#00A087FF",
fill = "#00A087FF",
alpha = 0.1) +
ggplot2::geom_line(
size = 2,
lineend = "round",
colour = "#00A087FF") +
theme_classic() +
ggsci::scale_colour_npg() +
labs(title = "Fucus serratus",
subtitle = "log2(TPM+1)",
y = "TSpI")
## New names:
## • `value` -> `value...2`
## • `value` -> `value...3`
# Fucus serratus
Fs_SpES_tS <-
tfStability(
Fs_SpES,
transforms = c("none", "sqrt", "log2", "rank"),
permutations = 500
)
Fs_SpES_tS_processed <-
tibble::as_tibble(Fs_SpES_tS, rownames = "transformation") %>%
dplyr::rename("pval" = value) %>%
dplyr::mutate(test = "FlatLineTest")
# Fucus distichus
Fd_SpES_tS <-
tfStability(
Fd_SpES,
transforms = c("none", "sqrt", "log2", "rank"),
permutations = 500
)
Fd_SpES_tS_processed <-
tibble::as_tibble(Fd_SpES_tS, rownames = "transformation") %>%
dplyr::rename("pval" = value) %>%
dplyr::mutate(test = "FlatLineTest")
Fs_SpES_tS_processed %>%
ggplot2::ggplot(
aes(
y = -log10(pval),
x = transformation)) +
ggplot2::geom_col(width = 0.05) +
ggplot2::geom_point(size = 5) +
ggplot2::geom_hline(
yintercept = -log10(0.05),
colour = "#E64B35FF",
linetype = 'dashed',
size = 1
) +
ggplot2::labs(
title = "Fucus serratus",
subtitle = "FlatLineTest",
x = "Transformation"
) +
ggplot2::coord_flip() +
ggplot2::theme_classic()
Fd_SpES_tS_processed %>%
ggplot2::ggplot(
aes(
y = -log10(pval),
x = transformation)) +
ggplot2::geom_col(width = 0.05) +
ggplot2::geom_point(size = 5) +
ggplot2::geom_hline(
yintercept = -log10(0.05),
colour = "#E64B35FF",
linetype = 'dashed',
size = 1
) +
ggplot2::labs(
title = "Fucus distichus",
subtitle = "FlatLineTest",
x = "Transformation"
) +
ggplot2::coord_flip() +
ggplot2::theme_classic()
In case you’re wondering, TDI stands for
transcriptome age index.
The R package Orthologr (see documentation) was used
to obtain the dNdS ratio for each gene.
library(orthologr)
Ec_dNdS <- dNdS(
query_file ="/mnt/projects/TDI/06__FINAL_CDS/PUBLIC_Ectocarpus-sp7_CDS-gff.fa",
subject_file ="/mnt/projects/TDI/06__FINAL_CDS/PUBLIC_Ectocarpus-subulatus_CDS-gff.fa",
ortho_detection ="RBH",
aa_aln_type ="pairwise",
aa_aln_tool ="NW",
codon_aln_tool ="pal2nal",
dnds_est.method ="Comeron",
comp_cores =1)
write.table(x = Ec_dNdS,
file = "Ec7_Ecs_dNdS.csv",
sep = ";",
col.names = TRUE,
row.names = FALSE,
quote = FALSE)
And from there compute the divergence stratum for each gene (where 1
means lowest divergence and 10 means highest divergence). In fact, one
can already use the orthologr::divergence_stratigraphy()
but I wanted to check how the dNdS ratios were distributed.
Ec_dNdS <- readr::read_csv2("data/Ec7_Ecs_dNdS.csv") %>%
drop_na() %>%
dplyr::mutate(GeneID = query_id) %>%
dplyr::mutate(dN = as.numeric(dN),
dS = as.numeric(dS),
dNdS = as.numeric(dNdS)) %>%
dplyr::ungroup()
Ec_dNdS_temp <-
filter(Ec_dNdS, dNdS <= 2) %>% drop_na()
Ec_dNdS_decval <-
stats::quantile(
Ec_dNdS_temp[ , "dNdS"],
probs = seq(0.0, 1, 0.1),
na.rm = TRUE)
Ec_dNdS_quintiles <- cut(Ec_dNdS_temp$dNdS, breaks = Ec_dNdS_decval, include.lowest = TRUE, labels = FALSE)
Ec_dNdS_temp$DS <- Ec_dNdS_quintiles
Ec_deciles <- Ec_dNdS_temp %>% dplyr::select(DS, GeneID)
rm(Ec_dNdS_temp)
Ec_deciles %>%
ggplot2::ggplot(aes(DS)) +
ggplot2::geom_bar() +
ggplot2::labs(
title = "DS distribution",
subtitle = "Ec7 vs Ec. siliculosus",
x = "DS"
)
Ec_abundance <-
readr::read_csv("data/Ec_abundance.csv")
Ec_abundance_25f <-
Ec_abundance %>%
dplyr::select(1 | dplyr::starts_with("F_"))
Ec_abundance_32m <-
Ec_abundance %>%
dplyr::select(1 | dplyr::starts_with("M_"))
sample_info_ec <-
readr::read_csv("data/sample_info_ec.csv")
construct_DES <-
function(
ExpressionMatrix,
DivergenceMap,
metadata,
AVG = median,
grouping_var = "Stage",
name_var = "LibName",
transform_opt = F,
transformation){
RepCol <-
metadata %>%
dplyr::group_by(.data[[grouping_var]]) %>%
dplyr::summarize(n=dplyr::n()) %>%
dplyr::arrange(
base::factor(
.data[[grouping_var]],
levels = base::unique(metadata[,grouping_var][[1]])
)
) %>%
base::as.vector()
# making sure the metadata and the expression set match
DES <-
ExpressionMatrix %>%
dplyr::left_join(
dplyr::select(
DivergenceMap,
c(GeneID, DS)
)
) %>%
tidyr::drop_na() %>% # NAs can ruin the transformation.
dplyr::relocate(DS, GeneID)
if(transform_opt){
if(transformation == "rlog"){
DES <-
myTAI::tf(
DES,
FUN = rlog,
integerise = TRUE)
}
else if(transformation == "vst"){
DES <-
myTAI::tf(
DES,
FUN = vst,
integerise = TRUE)
}
}
DES_FILT <-
DES %>%
dplyr::select(1:2 | metadata[,name_var][[1]])
OUT <- DES_FILT %>%
myTAI::CollapseReplicates(
RepCol$n,
AVG,
stage.names = RepCol[[grouping_var]])
return(OUT)
}
Ec_DES_25f <-
construct_DES(
Ec_abundance_25f,
Ec_deciles,
dplyr::filter(
sample_info_ec,
Sex == "F"),
name_var = "SampleName"
)
Ec_DES_32m <-
construct_DES(
Ec_abundance_32m,
Ec_deciles,
dplyr::filter(
sample_info_ec,
Sex == "M"),
name_var = "SampleName"
)
Ec_DES_32m.sqrt <-
myTAI::tf(
ExpressionSet = Ec_DES_32m,
FUN = sqrt)
Ec_DES_25f.sqrt <-
myTAI::tf(
ExpressionSet = Ec_DES_25f,
FUN = sqrt)
# save
Ec_DES_32m.sqrt %>%
readr::write_csv(file = "data/Ec_DES_32m.sqrt.csv")
Ec_DES_25f.sqrt %>%
readr::write_csv(file = "data/Ec_DES_25f.sqrt.csv")
library(scales)
library(see)
Ec_age <- readr::read_csv("data/Ec_age_processed.csv")
Fd_age <- readr::read_csv("data/Fd_age_processed.csv")
Fs_age <- readr::read_csv("data/Fs_age_processed.csv")
Kendall_dNdS <- Ec_dNdS %>%
filter(dNdS < 2, !dNdS == 0) %>%
left_join(Ec_age)
## Joining with `by = join_by(GeneID)`
Kendall_dNdS %>% drop_na() %>%
ggplot(aes(x = rank, y = dNdS, group = rank)) +
see::geom_violindot(dots_size = 0.05, binwidth = 0.02) +
labs(title = "Correlation between gene age and dNdS",
subtitle = "Ectocarpus",
x = "Gene age",
y = "dN/dS ratio\n(Ec7 vs Ec. siliculosus)") +
scale_x_continuous(breaks=c(1:max(Kendall_dNdS$rank, na.rm = TRUE))) +
theme_classic()
res<-cor.test(Kendall_dNdS$rank,Kendall_dNdS$dNdS, method="kendall")
res
##
## Kendall's rank correlation tau
##
## data: Kendall_dNdS$rank and Kendall_dNdS$dNdS
## z = 37.101, p-value < 2.2e-16
## alternative hypothesis: true tau is not equal to 0
## sample estimates:
## tau
## 0.2880629
Kendall_dNdS <- Ec_dNdS %>%
mutate(zero = case_when(
dNdS == 0 ~ "dNdS=0",
TRUE ~ "dNdS>0")) %>%
left_join(Ec_age)
## Joining with `by = join_by(GeneID)`
Kendall_dNdS %>% drop_na() %>%
ggplot(aes(x = rank, fill = zero)) +
ggplot2::geom_bar(colour = "black") +
labs(title = "Gene age and dNdS=0",
subtitle = "Ectocarpus",
x = "Gene age",
y = "Number of genes",
fill = "") +
scale_x_continuous(breaks=c(1:max(Kendall_dNdS$rank, na.rm = TRUE))) +
theme_classic() +
scale_fill_manual(values = c("#DC0000FF", "white"))
From DES (Divergence Expression Set), one can obtain the TDI profiles
through the mytTAI::TAI function. The myTAI
package, to my knowledge, does not enable the flexibility to plot the
separate sexes, for example in one plot. Therefore, I will not
use the myTAI::PlotSignature function.
library(tidyverse)
library(myTAI)
sqrt-tranformed
Ec_DES_32m.sqrt <-
readr::read_csv(file = "data/Ec_DES_32m.sqrt.csv", show_col_types = FALSE)
Ec_DES_25f.sqrt <-
readr::read_csv(file = "data/Ec_DES_25f.sqrt.csv", show_col_types = FALSE)
I had previously used a function to combine males and females into one TDI plot.
#TI stands for transcriptome index
TI.preplot <- function(ExpressionSet, permutations = 500){
std <-
myTAI::bootMatrix(
ExpressionSet = ExpressionSet,
permutations = permutations) %>%
apply(2, stats::sd)
TI.out <-
myTAI::TDI(ExpressionSet) %>%
tibble::as_tibble(rownames = "Stage", colnames = c("TDI", "PS")) %>%
dplyr::bind_cols(as_tibble(std)) %>%
dplyr::rename(TDI = 2, sd = 3)
return(TI.out)
}
# function to process Fd. This will be changed to accommodate more seq data.
get_TDI_Ec <- function(DES_M, DES_F, ordered_stages){
TDI_M <-
TI.preplot(DES_M) %>%
dplyr::mutate(Sex = "Male")
TDI_F <-
TI.preplot(DES_F) %>%
dplyr::mutate(Sex = "Female")
stages_to_NA <- # this will differ for Fucus distichus
ordered_stages[-c(1, 1+1, length(ordered_stages) - 1, length(ordered_stages))]
TDI_out <-
dplyr::bind_rows(TDI_M, TDI_F)
TDI_out$Stage <- base::factor(TDI_out$Stage, ordered_stages)
return(TDI_out)
}
ordered_stages <- colnames(Ec_DES_25f)[3:ncol(Ec_DES_25f)]
Ec_TDI.sqrt <-
get_TDI_Ec(
DES_M = Ec_DES_32m.sqrt,
DES_F = Ec_DES_25f.sqrt,
ordered_stages)
Ec_TDI.sqrt %>%
ggplot2::ggplot(
aes(
y = TDI,
x = Stage,
group = Sex,
colour = Sex,
fill = Sex)) +
ggplot2::geom_line(alpha = 0.4, linewidth = 1) +
ggplot2::geom_crossbar(
aes(ymin = TDI - sd,
ymax = TDI + sd
),
alpha = 0.5,
position = ggplot2::position_dodge(0.55),
width = 0.5) +
theme_classic() +
ggsci::scale_colour_npg() +
ggplot2::scale_x_discrete(guide = ggplot2::guide_axis(n.dodge=2)) +
labs(title = "Ectocarpus",
subtitle = "sqrt(TPM)")
# ggplot2::ggsave(filename = "Ec_TDI_sqrt.pdf", device = "pdf", width = 6, height = 4)
Instead of relying on one-to-one orthologues, we incorporate
in-paralogues into comparative transcriptomics analysis. As I
have discussed in the article vignette of myTAI (see here),
this approach has some limitations, in that while not explicitely
transferring (potential) issues from the
orthologue conjecture, some elements of this assumption is
incoporated. Regardless, I think it still captures more information than
using one-to-one orthologues.
This is different to the original filter because with the original filter, we wanted to retain as much genes as possible for TAI analysis. Here, we want to minimise species difference.
library(tidyverse)
library(ggfortify)
sample_info_fd <-
readr::read_csv2("data/sample_info_fd.csv", show_col_types = FALSE)
sample_info_fs <-
readr::read_csv2("data/sample_info_fs.csv", show_col_types = FALSE)
sample_info_fd_embryo <-
sample_info_fd %>%
dplyr::filter(!Stage %in% c("gamete", "matSP")) %>%
dplyr::mutate(LibLab = str_c(Species, "_", Stage, "_", Replicate))
sample_info_fs_embryo <-
sample_info_fs %>%
dplyr::filter(!Stage %in% c("gamete", "matSP")) %>%
mutate(LibLab = str_c(Species, "_", Stage, "_", Replicate))
Fs_abundance <-
readr::read_csv(file = "data/Fs_OG_abundance.csv", show_col_types = FALSE)
Fd_abundance <-
readr::read_csv(file = "data/Fd_OG_abundance.csv", show_col_types = FALSE)
selectGenes <- function(counts, min.count=2){
keep <-
counts[rowMeans(counts)>min.count, ]
return(keep)
}
To minimise noise, I remove OGs with low counts.
combined_metatable <-
rbind(sample_info_fs_embryo, sample_info_fd_embryo) %>%
relocate(LibName)
merged_TPM_pre <-
merge(Fs_abundance, Fd_abundance, by = "OG") %>%
dplyr::select(1, combined_metatable$LibName)
merged_TPM <-
data.frame(row.names = merged_TPM_pre[,1], merged_TPM_pre[,-1], check.names = FALSE)
merged_TPM.filt <-
as.matrix(merged_TPM) %>%
selectGenes(min.count=10)
Through the filtering function, I retain 2889 / 3724 OGs
rlog_TPM <- DESeq2::rlog(merged_TPM.filt %>% round(0))
log2_TPM <- log2(merged_TPM.filt+1)
sqrt_TPM <- sqrt(merged_TPM.filt)
tpm10_TPM <- (merged_TPM.filt > 10) * 1
# combined_metatable$Stage <- factor(combined_metatable$Stage, levels = unique(combined_metatable$Stage))
combined_metatable$Stage <- factor(combined_metatable$Stage, levels = unique(sample_info_fd_embryo$Stage))
myPCAfunction <- function(pcDat, data, x, y){
OUT <-
ggplot2::autoplot(
pcDat,
data = data,
# frame.colour = "Species" & "Stage",
colour = "Stage",
shape = "Species",
x = x,
y = y,
size = 5,
# frame = TRUE,
alpha = 0.5) +
ggplot2::scale_color_brewer(palette="Dark2")
return(OUT)
}
pcDat <- prcomp(t(rlog_TPM), scale = TRUE)
# pcDat <- prcomp(t(rlog_TPM))
PC1_2 <-
myPCAfunction(pcDat, data = combined_metatable, x = 1, y = 2) +
labs(title = "PC1 and PC2") +
theme_grey()
PC1_3 <-
myPCAfunction(pcDat, data = combined_metatable, x = 1, y = 3) +
labs(title = "PC1 and PC3") +
theme_grey()
rlog_plot <- ggpubr::ggarrange(PC1_2, PC1_3, common.legend = T, ncol = 1, nrow = 2)
ggpubr::annotate_figure(rlog_plot, top = ggpubr::text_grob("rlog",
face = "bold", size = 14))
pcDat <- prcomp(t(log2_TPM), scale = TRUE)
PC1_2 <- myPCAfunction(pcDat, data = combined_metatable, x = 1, y = 2) +
labs(title = "PC1 and PC2") +
theme_grey()
PC1_3 <- myPCAfunction(pcDat, data = combined_metatable, x = 1, y = 3) +
labs(title = "PC1 and PC3") +
theme_grey()
log2_plot <- ggpubr::ggarrange(PC1_2, PC1_3, common.legend = T)
ggpubr::annotate_figure(log2_plot, top = ggpubr::text_grob("log2",
face = "bold", size = 14))
pcDat <- prcomp(t(sqrt_TPM), scale = TRUE)
# pcDat <- prcomp(t(rlog_TPM))
PC1_2 <- myPCAfunction(pcDat, data = combined_metatable, x = 1, y = 2) +
labs(title = "PC1 and PC2") +
theme_grey()
PC1_3 <- myPCAfunction(pcDat, data = combined_metatable, x = 1, y = 3) +
labs(title = "PC1 and PC3") +
theme_grey()
sqrt_plot <- ggpubr::ggarrange(PC1_2, PC1_3, common.legend = T)
ggpubr::annotate_figure(sqrt_plot, top = ggpubr::text_grob("sqrt",
face = "bold", size = 14))
pcDat <- prcomp(t(tpm10_TPM)) # too many zeros to do a scale = TRUE
PC1_2 <- myPCAfunction(pcDat, data = combined_metatable, x = 1, y = 2) +
labs(title = "PC1 and PC2") +
theme_grey()
PC1_3 <- myPCAfunction(pcDat, data = combined_metatable, x = 1, y = 3) +
labs(title = "PC1 and PC3") +
theme_grey()
rlog_plot <- ggpubr::ggarrange(PC1_2, PC1_3, common.legend = T)
ggpubr::annotate_figure(rlog_plot, top = ggpubr::text_grob("tpm10 threshold",
face = "bold", size = 14))
Here, we use distance metrics to quantify how distant the stages are from each other across Fucus species and maybe even in Ectocarpus.
library(philentropy)
ncol_Fs <- grep( "Fs" , colnames( log2_TPM ) )
ncol_Fd <- grep( "Fd" , colnames( log2_TPM ) )
log2_TPM_renamed <- log2_TPM
base::colnames(log2_TPM_renamed) <- combined_metatable$LibLab
Linear relationship
M = cor(x = log2_TPM_renamed[,ncol_Fs],
y = log2_TPM_renamed[,ncol_Fd],
method = "pearson")
M_melt <- reshape2::melt(M)
tmp1 <- sample_info_fd_embryo %>%
dplyr::select(Var2 = LibLab, Stage) %>%
full_join(M_melt, multiple = "all") %>%
dplyr::rename(Fd = Stage)
tmp2 <- sample_info_fs_embryo %>%
dplyr::select(Var1 = LibLab, Stage) %>%
full_join(tmp1, multiple = "all") %>%
dplyr::rename(Fs = Stage)
tmp2$Fs <- factor(tmp2$Fs, levels = unique(tmp2$Fs))
tmp2$Fd <- factor(tmp2$Fd, levels = unique(tmp2$Fd))
tmp2 %>% group_by(Fs, Fd) %>% summarise(median_corr = median(value)) %>%
ggplot(aes(x = Fs, y = Fd, fill = median_corr)) +
geom_tile() +
geom_text(aes(label = paste0(round(median_corr,3))), color = "black") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
scale_fill_gradient(low = "white", high = "#DC0000FF") +
labs(x = "Fucus serratus samples",
y = "Fucus distichus samples",
title = "Pearson correlation (log2)",
fill = "median(correlation)")
Monotonic relationship
log2_TPM_renamed <- log2_TPM
base::colnames(log2_TPM_renamed) <- combined_metatable$LibLab
M = cor(x = log2_TPM_renamed[,ncol_Fs],
y = log2_TPM_renamed[,ncol_Fd],
method = "spearman")
M_melt <- reshape2::melt(M)
tmp1 <- sample_info_fd_embryo %>%
dplyr::select(Var2 = LibLab, Stage) %>%
full_join(M_melt, multiple = "all") %>%
dplyr::rename(Fd = Stage)
tmp2 <- sample_info_fs_embryo %>%
dplyr::select(Var1 = LibLab, Stage) %>%
full_join(tmp1, multiple = "all") %>%
dplyr::rename(Fs = Stage)
tmp2$Fs <- factor(tmp2$Fs, levels = unique(tmp2$Fs))
tmp2$Fd <- factor(tmp2$Fd, levels = unique(tmp2$Fd))
tmp2 %>% group_by(Fs, Fd) %>% summarise(median_corr = median(value)) %>%
ggplot(aes(x = Fs, y = Fd, fill = median_corr)) +
geom_tile() +
geom_text(aes(label = paste0(round(median_corr,3))), color = "black") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
scale_fill_gradient(low = "white", high = "#DC0000FF") +
labs(x = "Fucus serratus samples",
y = "Fucus distichus samples",
title = "Spearman correlation (log2)",
fill = "median(correlation)")
Manhattan distance is a distance metric used to measure the distance between two points in a grid-like system, such as a city block or a chessboard. It is calculated as the sum of the absolute differences between the coordinates of the two points. The Manhattan distance is also known as the L1 norm or taxicab distance.
It is L_{1} norm unlike Euclidean L_{2} norm.
Because it uses absolute values instead of exponentiation and rooting, it is said to be more robust to outliers compared to the euclidean distance. Additionally, the modulus is much faster to compute than exponentiation. It is a special case of the Minkowski distance.
DM <- philentropy::distance(t(log2_TPM_renamed), use.row.names = TRUE, method = "manhattan")
DM2 <- DM[ncol_Fs, ncol_Fd]
M_melt <- reshape2::melt(DM2)
tmp1 <- sample_info_fd_embryo %>%
dplyr::select(Var2 = LibLab, Stage) %>%
full_join(M_melt, multiple = "all") %>%
dplyr::rename(Fd = Stage)
tmp2 <- sample_info_fs_embryo %>%
dplyr::select(Var1 = LibLab, Stage) %>%
full_join(tmp1, multiple = "all") %>%
dplyr::rename(Fs = Stage)
tmp2$Fs <- factor(tmp2$Fs, levels = unique(tmp2$Fs))
tmp2$Fd <- factor(tmp2$Fd, levels = unique(tmp2$Fd))
tmp2 %>% group_by(Fs, Fd) %>% summarise(median_corr = median(value)) %>%
ggplot(aes(x = Fs, y = Fd, fill = median_corr)) +
geom_tile() +
geom_text(aes(label = paste0(round(median_corr,0))), color = "black") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
scale_fill_gradient(low = "white", high = "#3C5488FF") +
labs(x = "Fucus serratus samples",
y = "Fucus distichus samples",
title = "Manhattan distance (log2)",
fill = "median(distance)")
Jensen-Shannon distance is a statistical distance metric that measures the similarity between two probability distributions. It is often used in data analysis and machine learning for clustering, classification, and anomaly detection tasks.
mat_normalized <- apply(log2_TPM_renamed, 2, function(x) x/sum(x)) %>% as.data.frame()
# colSums(mat_normalized)
DM <- philentropy::distance(t(mat_normalized), use.row.names = TRUE, method = "jensen-shannon")
DM2 <- DM[ncol_Fs, ncol_Fd]
DM3 <- sqrt(DM2)
M_melt <- reshape2::melt(DM3)
tmp1 <- sample_info_fd_embryo %>%
dplyr::select(Var2 = LibLab, Stage) %>%
full_join(M_melt, multiple = "all") %>%
dplyr::rename(Fd = Stage)
tmp2 <- sample_info_fs_embryo %>%
dplyr::select(Var1 = LibLab, Stage) %>%
full_join(tmp1, multiple = "all") %>%
dplyr::rename(Fs = Stage)
tmp2$Fs <- factor(tmp2$Fs, levels = unique(tmp2$Fs))
tmp2$Fd <- factor(tmp2$Fd, levels = unique(tmp2$Fd))
tmp2 %>% group_by(Fs, Fd) %>% summarise(median_corr = median(value)) %>%
ggplot(aes(x = Fs, y = Fd, fill = median_corr)) +
geom_tile() +
geom_text(aes(label = paste0(round(median_corr,3))), color = "black") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
scale_fill_gradient(low = "white", high = "#3C5488FF") +
labs(x = "Fucus serratus samples",
y = "Fucus distichus samples",
title = "JSD metric (norm. log2)",
fill = "median(distance)")
I am using rlog data here because it reduces the distance since there is higher correlation between the low-count genes. It should be noted though that the transformation here isn’t monotonic.
rlog_TPM_renamed <- rlog_TPM
rlog_TPM_renamed <- rlog_TPM
base::colnames(rlog_TPM_renamed) <- combined_metatable$LibLab
base::colnames(rlog_TPM_renamed) <- combined_metatable$LibLab
M = cor(x = rlog_TPM_renamed[,ncol_Fs],
y = rlog_TPM_renamed[,ncol_Fd],
method = "pearson")
M_melt <- reshape2::melt(M)
tmp1 <- sample_info_fd_embryo %>%
dplyr::select(Var2 = LibLab, Stage) %>%
full_join(M_melt, multiple = "all") %>%
dplyr::rename(Fd = Stage)
tmp2 <- sample_info_fs_embryo %>%
dplyr::select(Var1 = LibLab, Stage) %>%
full_join(tmp1, multiple = "all") %>%
dplyr::rename(Fs = Stage)
tmp2$Fs <- factor(tmp2$Fs, levels = unique(tmp2$Fs))
tmp2$Fd <- factor(tmp2$Fd, levels = unique(tmp2$Fd))
tmp2 %>% group_by(Fs, Fd) %>% summarise(median_corr = median(value)) %>%
ggplot(aes(x = Fs, y = Fd, fill = median_corr)) +
geom_tile() +
geom_text(aes(label = paste0(round(median_corr,3))), color = "black") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
scale_fill_gradient(low = "white", high = "#DC0000FF") +
labs(x = "Fucus serratus samples",
y = "Fucus distichus samples",
title = "Pearson correlation (rlog)",
fill = "median(correlation)")
# save plot
# ggplot2::ggsave(filename = "Fucus_Pearson.pdf", device = "pdf", width = 4.5, height = 3)
rlog_TPM_renamed <- rlog_TPM
base::colnames(rlog_TPM_renamed) <- combined_metatable$LibLab
M = cor(x = rlog_TPM_renamed[,ncol_Fs],
y = rlog_TPM_renamed[,ncol_Fd],
method = "spearman")
M_melt <- reshape2::melt(M)
tmp1 <- sample_info_fd_embryo %>%
dplyr::select(Var2 = LibLab, Stage) %>%
full_join(M_melt, multiple = "all") %>%
dplyr::rename(Fd = Stage)
tmp2 <- sample_info_fs_embryo %>%
dplyr::select(Var1 = LibLab, Stage) %>%
full_join(tmp1, multiple = "all") %>%
dplyr::rename(Fs = Stage)
tmp2$Fs <- factor(tmp2$Fs, levels = unique(tmp2$Fs))
tmp2$Fd <- factor(tmp2$Fd, levels = unique(tmp2$Fd))
tmp2 %>% group_by(Fs, Fd) %>% summarise(median_corr = median(value)) %>%
ggplot(aes(x = Fs, y = Fd, fill = median_corr)) +
geom_tile() +
geom_text(aes(label = paste0(round(median_corr,3))), color = "black") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
scale_fill_gradient(low = "white", high = "#DC0000FF") +
labs(x = "Fucus serratus samples",
y = "Fucus distichus samples",
title = "Spearman correlation (rlog)",
fill = "median(correlation)")
# save plot
# ggplot2::ggsave(filename = "Fucus_Spearman.pdf", device = "pdf", width = 4.5, height = 3)
DM <- philentropy::distance(t(rlog_TPM_renamed), use.row.names = TRUE, method = "manhattan")
DM2 <- DM[ncol_Fs, ncol_Fd]
M_melt <- reshape2::melt(DM2)
tmp1 <- sample_info_fd_embryo %>%
dplyr::select(Var2 = LibLab, Stage) %>%
full_join(M_melt, multiple = "all") %>%
dplyr::rename(Fd = Stage)
tmp2 <- sample_info_fs_embryo %>%
dplyr::select(Var1 = LibLab, Stage) %>%
full_join(tmp1, multiple = "all") %>%
dplyr::rename(Fs = Stage)
tmp2$Fs <- factor(tmp2$Fs, levels = unique(tmp2$Fs))
tmp2$Fd <- factor(tmp2$Fd, levels = unique(tmp2$Fd))
tmp2 %>% group_by(Fs, Fd) %>% summarise(median_corr = median(value)) %>%
ggplot(aes(x = Fs, y = Fd, fill = median_corr)) +
geom_tile() +
geom_text(aes(label = paste0(round(median_corr,0))), color = "black") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
scale_fill_gradient(low = "white", high = "#3C5488FF") +
labs(x = "Fucus serratus samples",
y = "Fucus distichus samples",
title = "Manhattan distance (rlog)",
fill = "median(distance)")
# save plot
# ggplot2::ggsave(filename = "Fucus_Manhattan.pdf", device = "pdf", width = 4.5, height = 3)
DM <- philentropy::distance(t(rlog_TPM_renamed), use.row.names = TRUE, method = "euclidean")
DM2 <- DM[ncol_Fs, ncol_Fd]
M_melt <- reshape2::melt(DM2)
tmp1 <- sample_info_fd_embryo %>%
dplyr::select(Var2 = LibLab, Stage) %>%
full_join(M_melt, multiple = "all") %>%
dplyr::rename(Fd = Stage)
tmp2 <- sample_info_fs_embryo %>%
dplyr::select(Var1 = LibLab, Stage) %>%
full_join(tmp1, multiple = "all") %>%
dplyr::rename(Fs = Stage)
tmp2$Fs <- factor(tmp2$Fs, levels = unique(tmp2$Fs))
tmp2$Fd <- factor(tmp2$Fd, levels = unique(tmp2$Fd))
tmp2 %>% group_by(Fs, Fd) %>% summarise(median_corr = median(value)) %>%
ggplot(aes(x = Fs, y = Fd, fill = median_corr)) +
geom_tile() +
geom_text(aes(label = paste0(round(median_corr,1))), color = "black") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
scale_fill_gradient(low = "white", high = "#3C5488FF") +
labs(x = "Fucus serratus samples",
y = "Fucus distichus samples",
title = "Euclidean distance (rlog)",
fill = "median(distance)")
mat_normalized <- apply(rlog_TPM_renamed, 2, function(x) x/sum(x)) %>% as.data.frame()
# colSums(mat_normalized)
DM <- philentropy::distance(t(mat_normalized), use.row.names = TRUE, method = "jensen-shannon")
DM2 <- DM[ncol_Fs, ncol_Fd]
DM3 <- sqrt(DM2)
M_melt <- reshape2::melt(DM3)
tmp1 <- sample_info_fd_embryo %>%
dplyr::select(Var2 = LibLab, Stage) %>%
full_join(M_melt, multiple = "all") %>%
dplyr::rename(Fd = Stage)
tmp2 <- sample_info_fs_embryo %>%
dplyr::select(Var1 = LibLab, Stage) %>%
full_join(tmp1, multiple = "all") %>%
dplyr::rename(Fs = Stage)
tmp2$Fs <- factor(tmp2$Fs, levels = unique(tmp2$Fs))
tmp2$Fd <- factor(tmp2$Fd, levels = unique(tmp2$Fd))
tmp2 %>% group_by(Fs, Fd) %>% summarise(median_corr = median(value)) %>%
ggplot(aes(x = Fs, y = Fd, fill = median_corr)) +
geom_tile() +
geom_text(aes(label = paste0(round(median_corr,3))), color = "black") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
scale_fill_gradient(low = "white", high = "#3C5488FF") +
labs(x = "Fucus serratus samples",
y = "Fucus distichus samples",
title = "JSD metric (norm. rlog)",
fill = "median(distance)")
# save plot
# ggplot2::ggsave(filename = "Fucus_JSD.pdf", device = "pdf", width = 4.5, height = 3)
After a question in the TAC about whether the same genes drive the TAI, I decided to use pMatrix and check which genes drive the high (young) TAI in the gametes.
library(tidyverse)
library(myTAI)
library(see)
Non-transformed
Ec_PES_32m <-
readr::read_csv(file = "data/Ec_PES_32m.csv", show_col_types = FALSE)
Ec_PES_25f <-
readr::read_csv(file = "data/Ec_PES_25f.csv", show_col_types = FALSE)
sqrt-tranformed
Ec_PES_32m.sqrt <-
readr::read_csv(file = "data/Ec_PES_32m.sqrt.csv", show_col_types = FALSE)
Ec_PES_25f.sqrt <-
readr::read_csv(file = "data/Ec_PES_25f.sqrt.csv", show_col_types = FALSE)
log2-tranformed
Ec_PES_32m.log2 <-
readr::read_csv(file = "data/Ec_PES_32m.log2.csv", show_col_types = FALSE)
Ec_PES_25f.log2 <-
readr::read_csv(file = "data/Ec_PES_25f.log2.csv", show_col_types = FALSE)
rank-tranformed
Ec_PES_32m.rank <-
readr::read_csv(file = "data/Ec_PES_32m.rank.csv", show_col_types = FALSE)
Ec_PES_25f.rank <-
readr::read_csv(file = "data/Ec_PES_25f.rank.csv", show_col_types = FALSE)
rlog-tranformed
Ec_PES_32m.rlog <-
readr::read_csv(file = "data/Ec_PES_32m.rlog.csv", show_col_types = FALSE)
Ec_PES_25f.rlog <-
readr::read_csv(file = "data/Ec_PES_25f.rlog.csv", show_col_types = FALSE)
unicellular_pMat <- function(PES, unicell_names = c("meiospore", "gamete", "mitospore"), dropzero = F, ordered_stages, top_genes = F, n_genes = 500, stage_select = "gamete"){
if(!is.data.frame(PES))
stop("PES is not a dataframe")
stage_select <- rlang::sym(stage_select)
PES_filt <- PES %>%
dplyr::select(1, 2, all_of(unicell_names))
if(dropzero){
PES_filt <- PES_filt %>%
dplyr::filter(rowSums(!select(., all_of(unicell_names))) == 0)
}
PES_filt <- PES_filt %>%
myTAI::pMatrix() %>%
tibble::as_tibble(rownames = "GeneID")
# in case one wants to select the top N genes.
if(top_genes){
PES_filt <- PES_filt %>%
dplyr::slice_max({{stage_select}}, n = n_genes)
}
PES_filt <- PES_filt %>%
tidyr::pivot_longer(!GeneID, names_to = "Stage", values_to = "pTAI")
# make factor
PES_filt$Stage <- base::factor(PES_filt$Stage, ordered_stages)
return(PES_filt)
}
test_data <- unicellular_pMat(Ec_PES_32m.sqrt) %>% dplyr::filter(Stage == "meiospore")
test_data2 <- sort(test_data$pTAI, decreasing = TRUE)
plot(test_data2, main = "Ec32m meiospore sqrt", ylab = "pTAI", xlab = "sorted genes"); abline(v = 500, col = "green"); abline(v = 100, col = "red"); abline(v = 1000, col = "blue")
test_data <- unicellular_pMat(Ec_PES_25f.sqrt) %>% dplyr::filter(Stage == "meiospore")
test_data2 <- sort(test_data$pTAI, decreasing = TRUE)
plot(test_data2, main = "Ec25f meiospore sqrt", ylab = "pTAI", xlab = "sorted genes"); abline(v = 500, col = "green"); abline(v = 100, col = "red"); abline(v = 1000, col = "blue")
It seems like choosing the top 500 genes is a good idea based on the elbow heuristics. Red = top 100; green = top 500; blue = top 1000
here are better plots
# Meiospore
test_data <- unicellular_pMat(Ec_PES_32m.sqrt) %>% dplyr::filter(Stage == "meiospore")
test_data2 <- sort(test_data$pTAI, decreasing = TRUE)
pdf(file = "pTAI_plots/Ec32m_meiospore_sqrt.pdf",
width = 3.3,
height = 3)
plot(test_data2, main = "Ec32m meiospore sqrt", ylab = "pTAI", xlab = "sorted genes"); abline(v = 500, col = "green"); # abline(v = 100, col = "red"); abline(v = 1000, col = "blue")
dev.off()
## quartz_off_screen
## 2
test_data <- unicellular_pMat(Ec_PES_25f.sqrt) %>% dplyr::filter(Stage == "meiospore")
test_data2 <- sort(test_data$pTAI, decreasing = TRUE)
pdf(file = "pTAI_plots/Ec25f_meiospore_sqrt.pdf",
width = 3.3,
height = 3)
plot(test_data2, main = "Ec25f meiospore sqrt", ylab = "pTAI", xlab = "sorted genes"); abline(v = 500, col = "green"); # abline(v = 100, col = "red"); abline(v = 1000, col = "blue")
dev.off()
## quartz_off_screen
## 2
# Gamete
test_data <- unicellular_pMat(Ec_PES_32m.sqrt) %>% dplyr::filter(Stage == "gamete")
test_data2 <- sort(test_data$pTAI, decreasing = TRUE)
pdf(file = "pTAI_plots/Ec32m_gamete_sqrt.pdf",
width = 3.3,
height = 3)
plot(test_data2, main = "Ec32m gamete sqrt", ylab = "pTAI", xlab = "sorted genes"); abline(v = 500, col = "green"); # abline(v = 100, col = "red"); abline(v = 1000, col = "blue")
dev.off()
## quartz_off_screen
## 2
test_data <- unicellular_pMat(Ec_PES_25f.sqrt) %>% dplyr::filter(Stage == "gamete")
test_data2 <- sort(test_data$pTAI, decreasing = TRUE)
pdf(file = "pTAI_plots/Ec25f_gamete_sqrt.pdf",
width = 3.3,
height = 3)
plot(test_data2, main = "Ec25f gamete sqrt", ylab = "pTAI", xlab = "sorted genes"); abline(v = 500, col = "green"); # abline(v = 100, col = "red"); abline(v = 1000, col = "blue")
dev.off()
## quartz_off_screen
## 2
# Mitospore
test_data <- unicellular_pMat(Ec_PES_32m.sqrt) %>% dplyr::filter(Stage == "mitospore")
test_data2 <- sort(test_data$pTAI, decreasing = TRUE)
pdf(file = "pTAI_plots/Ec32m_mitospore_sqrt.pdf",
width = 3.3,
height = 3)
plot(test_data2, main = "Ec32m mitospore sqrt", ylab = "pTAI", xlab = "sorted genes"); abline(v = 500, col = "green"); # abline(v = 100, col = "red"); abline(v = 1000, col = "blue")
dev.off()
## quartz_off_screen
## 2
test_data <- unicellular_pMat(Ec_PES_25f.sqrt) %>% dplyr::filter(Stage == "mitospore")
test_data2 <- sort(test_data$pTAI, decreasing = TRUE)
pdf(file = "pTAI_plots/Ec25f_mitospore_sqrt.pdf",
width = 3.3,
height = 3)
plot(test_data2, main = "Ec25f mitospore sqrt", ylab = "pTAI", xlab = "sorted genes"); abline(v = 500, col = "green"); # abline(v = 100, col = "red"); abline(v = 1000, col = "blue")
dev.off()
## quartz_off_screen
## 2
# male
test_data <- unicellular_pMat(Ec_PES_32m.sqrt, unicell_names = "immGA") %>% dplyr::filter(Stage == "immGA")
test_data2 <- sort(test_data$pTAI, decreasing = TRUE)
pdf(file = "pTAI_plots/Ec32m_immGA_sqrt.pdf",
width = 2.5,
height = 3)
plot(test_data2, main = "Ec32m immGA", ylab = "pTAI", xlab = "sorted genes"); abline(v = 500, col = "green"); # abline(v = 100, col = "red"); abline(v = 1000, col = "blue")
dev.off()
## quartz_off_screen
## 2
test_data <- unicellular_pMat(Ec_PES_32m.sqrt, unicell_names = "matGA") %>% dplyr::filter(Stage == "matGA")
test_data2 <- sort(test_data$pTAI, decreasing = TRUE)
pdf(file = "pTAI_plots/Ec32m_matGA_sqrt.pdf",
width = 2.5,
height = 3)
plot(test_data2, main = "Ec32m matGA", ylab = "pTAI", xlab = "sorted genes"); abline(v = 500, col = "green"); # abline(v = 100, col = "red"); abline(v = 1000, col = "blue")
dev.off()
## quartz_off_screen
## 2
test_data <- unicellular_pMat(Ec_PES_32m.sqrt, unicell_names = "oldGA") %>% dplyr::filter(Stage == "oldGA")
test_data2 <- sort(test_data$pTAI, decreasing = TRUE)
pdf(file = "pTAI_plots/Ec32m_oldGA_sqrt.pdf",
width = 2.5,
height = 3)
plot(test_data2, main = "Ec32m oldGA", ylab = "pTAI", xlab = "sorted genes"); abline(v = 500, col = "green"); # abline(v = 100, col = "red"); abline(v = 1000, col = "blue")
dev.off()
## quartz_off_screen
## 2
test_data <- unicellular_pMat(Ec_PES_32m.sqrt, unicell_names = "earlyPSP") %>% dplyr::filter(Stage == "earlyPSP")
test_data2 <- sort(test_data$pTAI, decreasing = TRUE)
pdf(file = "pTAI_plots/Ec32m_earlyPSP_sqrt.pdf",
width = 2.5,
height = 3)
plot(test_data2, main = "Ec32m earlyPSP", ylab = "pTAI", xlab = "sorted genes"); abline(v = 500, col = "green"); # abline(v = 100, col = "red"); abline(v = 1000, col = "blue")
dev.off()
## quartz_off_screen
## 2
test_data <- unicellular_pMat(Ec_PES_32m.sqrt, unicell_names = "immPSP") %>% dplyr::filter(Stage == "immPSP")
test_data2 <- sort(test_data$pTAI, decreasing = TRUE)
pdf(file = "pTAI_plots/Ec32m_immPSP_sqrt.pdf",
width = 2.5,
height = 3)
plot(test_data2, main = "Ec32m immPSP", ylab = "pTAI", xlab = "sorted genes"); abline(v = 500, col = "green"); # abline(v = 100, col = "red"); abline(v = 1000, col = "blue")
dev.off()
## quartz_off_screen
## 2
test_data <- unicellular_pMat(Ec_PES_32m.sqrt, unicell_names = "matPSP") %>% dplyr::filter(Stage == "matPSP")
test_data2 <- sort(test_data$pTAI, decreasing = TRUE)
pdf(file = "pTAI_plots/Ec32m_matPSP_sqrt.pdf",
width = 2.5,
height = 3)
plot(test_data2, main = "Ec32m matPSP", ylab = "pTAI", xlab = "sorted genes"); abline(v = 500, col = "green"); # abline(v = 100, col = "red"); abline(v = 1000, col = "blue")
dev.off()
## quartz_off_screen
## 2
# female
test_data <- unicellular_pMat(Ec_PES_25f.sqrt, unicell_names = "immGA") %>% dplyr::filter(Stage == "immGA")
test_data2 <- sort(test_data$pTAI, decreasing = TRUE)
pdf(file = "pTAI_plots/Ec25f_immGA_sqrt.pdf",
width = 2.5,
height = 3)
plot(test_data2, main = "Ec25f immGA", ylab = "pTAI", xlab = "sorted genes"); abline(v = 500, col = "green"); # abline(v = 100, col = "red"); abline(v = 1000, col = "blue")
dev.off()
## quartz_off_screen
## 2
test_data <- unicellular_pMat(Ec_PES_25f.sqrt, unicell_names = "matGA") %>% dplyr::filter(Stage == "matGA")
test_data2 <- sort(test_data$pTAI, decreasing = TRUE)
pdf(file = "pTAI_plots/Ec25f_matGA_sqrt.pdf",
width = 2.5,
height = 3)
plot(test_data2, main = "Ec25f matGA", ylab = "pTAI", xlab = "sorted genes"); abline(v = 500, col = "green"); # abline(v = 100, col = "red"); abline(v = 1000, col = "blue")
dev.off()
## quartz_off_screen
## 2
test_data <- unicellular_pMat(Ec_PES_25f.sqrt, unicell_names = "oldGA") %>% dplyr::filter(Stage == "oldGA")
test_data2 <- sort(test_data$pTAI, decreasing = TRUE)
pdf(file = "pTAI_plots/Ec25f_oldGA_sqrt.pdf",
width = 2.5,
height = 3)
plot(test_data2, main = "Ec25f oldGA", ylab = "pTAI", xlab = "sorted genes"); abline(v = 500, col = "green"); # abline(v = 100, col = "red"); abline(v = 1000, col = "blue")
dev.off()
## quartz_off_screen
## 2
test_data <- unicellular_pMat(Ec_PES_25f.sqrt, unicell_names = "earlyPSP") %>% dplyr::filter(Stage == "earlyPSP")
test_data2 <- sort(test_data$pTAI, decreasing = TRUE)
pdf(file = "pTAI_plots/Ec25f_earlyPSP_sqrt.pdf",
width = 2.5,
height = 3)
plot(test_data2, main = "Ec25f earlyPSP", ylab = "pTAI", xlab = "sorted genes"); abline(v = 500, col = "green"); # abline(v = 100, col = "red"); abline(v = 1000, col = "blue")
dev.off()
## quartz_off_screen
## 2
test_data <- unicellular_pMat(Ec_PES_25f.sqrt, unicell_names = "immPSP") %>% dplyr::filter(Stage == "immPSP")
test_data2 <- sort(test_data$pTAI, decreasing = TRUE)
pdf(file = "pTAI_plots/Ec25f_immPSP_sqrt.pdf",
width = 2.5,
height = 3)
plot(test_data2, main = "Ec25f immPSP", ylab = "pTAI", xlab = "sorted genes"); abline(v = 500, col = "green"); # abline(v = 100, col = "red"); abline(v = 1000, col = "blue")
dev.off()
## quartz_off_screen
## 2
test_data <- unicellular_pMat(Ec_PES_25f.sqrt, unicell_names = "matPSP") %>% dplyr::filter(Stage == "matPSP")
test_data2 <- sort(test_data$pTAI, decreasing = TRUE)
pdf(file = "pTAI_plots/Ec25f_matPSP_sqrt.pdf",
width = 2.5,
height = 3)
plot(test_data2, main = "Ec25f matPSP", ylab = "pTAI", xlab = "sorted genes"); abline(v = 500, col = "green"); # abline(v = 100, col = "red"); abline(v = 1000, col = "blue")
dev.off()
## quartz_off_screen
## 2
unicellular_pMat_plot <- function(PES,
unicell_names = c("meiospore", "gamete", "mitospore"),
n_genes = 500,
stage_select = "gamete",
transformation = "log2",
alpha = 0,
dropzero = F,
ordered_stages) {
top_pTAI <- PES %>%
unicellular_pMat(dropzero = dropzero, ordered_stages = ordered_stages, top_genes = TRUE, stage_select = stage_select, n_genes = n_genes)
PES_filt <- unicellular_pMat(PES = PES, unicell_names = unicell_names, ordered_stages = ordered_stages)
# Rest of your function logic
# ...
# transformation
f <- match.fun(transformation)
plot_re <- PES_filt %>%
ggplot2::ggplot(aes(y = f(pTAI + alpha), x = Stage, fill = Stage)) +
see::geom_violinhalf(trim = TRUE, width=1.2, alpha = 0.8) +
ggplot2::scale_fill_manual(values = c("#3C5488FF", "white", "white")) +
ggplot2::geom_line(
data = top_pTAI,
aes(y = f(pTAI + alpha), x = Stage, group = GeneID),
alpha = 0.2,
colour = "#3C5488FF") +
ggplot2::geom_point(
data = top_pTAI,
aes(y = f(pTAI + alpha), x = Stage, group = GeneID),
alpha = 0.3,
size = 0.7,
colour = "#3C5488FF") +
ggplot2::ylab(paste0(transformation, "(pTAI)")) +
ggplot2::theme_classic() +
ggplot2::theme(legend.position="none") +
ggplot2::coord_flip()
return(plot_re)
}
unicellular_pMat_plot_all <- function(PES,
unicell_names = c("meiospore", "gamete", "mitospore"),
n_genes = 500,
transformation = "log2",
alpha = 0,
dropzero = F,
ordered_stages) {
top_pTAI_1 <- PES %>%
unicellular_pMat(dropzero = dropzero, ordered_stages = ordered_stages, top_genes = TRUE, stage_select = unicell_names[[1]], n_genes = n_genes)
top_pTAI_2 <- PES %>%
unicellular_pMat(dropzero = dropzero, ordered_stages = ordered_stages, top_genes = TRUE, stage_select = unicell_names[[2]], n_genes = n_genes)
top_pTAI_3 <- PES %>%
unicellular_pMat(dropzero = dropzero, ordered_stages = ordered_stages, top_genes = TRUE, stage_select = unicell_names[[3]], n_genes = n_genes)
top_pTAI_all <- top_pTAI_1 %>%
inner_join(top_pTAI_2) %>%
inner_join(top_pTAI_3)
PES_filt <- unicellular_pMat(PES = PES, unicell_names = unicell_names, ordered_stages = ordered_stages)
# Rest of your function logic
# ...
# transformation
f <- match.fun(transformation)
plot_re <- PES_filt %>%
ggplot2::ggplot(aes(y = f(pTAI + alpha), x = Stage, fill = Stage)) +
see::geom_violinhalf(trim = TRUE, width=1.2, alpha = 0.5) +
ggplot2::scale_fill_manual(values = c("#3C5488FF", "#E64B35FF", "#00A087FF")) +
ggplot2::geom_line(
data = top_pTAI_1,
aes(y = f(pTAI + alpha), x = Stage, group = GeneID),
alpha = 0.1,
size = 0.3,
colour = "#3C5488FF") +
ggplot2::geom_point(
data = top_pTAI_1,
aes(y = f(pTAI + alpha), x = Stage, group = GeneID),
alpha = 0.1,
size = 0.7,
colour = "#3C5488FF") +
ggplot2::geom_line(
data = top_pTAI_2,
aes(y = f(pTAI + alpha), x = Stage, group = GeneID),
alpha = 0.1,
size = 0.3,
colour = "#E64B35FF") +
ggplot2::geom_point(
data = top_pTAI_3,
aes(y = f(pTAI + alpha), x = Stage, group = GeneID),
alpha = 0.1,
size = 0.7,
colour = "#E64B35FF") +
ggplot2::geom_line(
data = top_pTAI_3,
aes(y = f(pTAI + alpha), x = Stage, group = GeneID),
alpha = 0.1,
size = 0.3,
colour = "#00A087FF") +
ggplot2::geom_point(
data = top_pTAI_3,
aes(y = f(pTAI + alpha), x = Stage, group = GeneID),
alpha = 0.1,
size = 0.7,
colour = "#00A087FF") +
ggplot2::geom_line(
data = top_pTAI_all,
aes(y = f(pTAI + alpha), x = Stage, group = GeneID),
alpha = 0.5,
colour = "black") +
ggplot2::ylab(paste0(transformation, "(pTAI)")) +
ggplot2::theme_classic() +
ggplot2::theme(legend.position="none") +
ggplot2::coord_flip()
return(plot_re)
}
ordered_stages <- c("gamete", "meiospore", "mitospore")
unicellular_pMat_plot_all(Ec_PES_32m.sqrt, ordered_stages = ordered_stages) +
ggplot2::labs(title = "Ec32m",
subtitle = "sqrt(TPM)")
ggplot2::ggsave(filename = "pTAI_plots/Ec32m_pTAI_unicell.pdf", device = "pdf", width = 5, height = 3)
unicellular_pMat_plot_all(Ec_PES_25f.sqrt, ordered_stages = ordered_stages) +
ggplot2::labs(title = "Ec25f",
subtitle = "sqrt(TPM)")
ggplot2::ggsave(filename = "pTAI_plots/Ec25f_pTAI_unicell.pdf", device = "pdf", width = 5, height = 3)
During multicellular stages.
multicellular_pMat_plot <- function(PES,
multicell_names = c("immGA", "matGA", "oldGA", "earlyPSP", "immPSP", "matPSP"),
n_genes = 500,
stage_select = "immGA",
transformation = "log2",
alpha = 0,
dropzero = F,
ordered_stages) {
top_pTAI <- PES %>%
unicellular_pMat(dropzero = dropzero, unicell_names = multicell_names, ordered_stages = ordered_stages, top_genes = TRUE, stage_select = stage_select, n_genes = n_genes)
PES_filt <- unicellular_pMat(PES = PES, unicell_names = multicell_names, ordered_stages = ordered_stages)
# transformation
f <- match.fun(transformation)
plot_re <- PES_filt %>%
ggplot2::ggplot(aes(y = f(pTAI + alpha), x = Stage, fill = Stage)) +
see::geom_violinhalf(trim = TRUE, width=1.2, alpha = 0.8) +
ggplot2::geom_line(
data = top_pTAI,
aes(y = f(pTAI + alpha), x = Stage, group = GeneID),
alpha = 0.2,
colour = "#3C5488FF") +
ggplot2::geom_point(
data = top_pTAI,
aes(y = f(pTAI + alpha), x = Stage, group = GeneID),
alpha = 0.3,
size = 0.7,
colour = "#3C5488FF") +
ggplot2::ylab(paste0(transformation, "(pTAI)")) +
ggplot2::theme_classic() +
ggplot2::theme(legend.position="none") +
ggplot2::coord_flip()
return(plot_re)
}
pTAI)Based on the top 500 contributors to TAI, we can assess whether
certain GO categories are enriched. We use Fisher’s exact
test statistics with the parent-child algorithm as
implemented in TopGO. [Note, the enrichment analysis per se
was not possible but the GO terms on each gene was analysed]
library(tximport)
library(myTAI)
library(tidyverse)
First, I filtered removed the last * and then any
internal * from the protein sequences, so that we can run
interproscan.
sed 's/\*$//' PUBLIC_Ectocarpus-sp7_proteins.fa | sed 's/\*//g' > Ectocarpus-sp7.filt.faa
mkdir Ec_ips
~/bin/interproscan-5.61-93.0/interproscan.sh \
-i Ectocarpus-sp7.filt.faa \
-d Ec_ips \
-t p \
--goterms \
-cpu 20 \
-dp
Ec_PES_32m.sqrt_unicell <- readr::read_csv("data/pTAI/Ec_PES_32m.sqrt_unicell.csv")
## Rows: 115 Columns: 1
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GeneID
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Ec_PES_25f.sqrt_unicell <- readr::read_csv("data/pTAI/Ec_PES_25f.sqrt_unicell.csv")
## Rows: 98 Columns: 1
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GeneID
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Ec_PES_32m.sqrt_multicell <- readr::read_csv("data/pTAI/Ec_PES_32m.sqrt_multicell.csv")
## Rows: 198 Columns: 1
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GeneID
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Ec_PES_25f.sqrt_multicell <- readr::read_csv("data/pTAI/Ec_PES_25f.sqrt_multicell.csv")
## Rows: 201 Columns: 1
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GeneID
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# # Non-transformed
Ec_PES_32m <-
readr::read_csv(file = "data/Ec_PES_32m.csv")
## Rows: 11571 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GeneID
## dbl (10): PS, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, immPSP, matP...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Ec_PES_25f <-
readr::read_csv(file = "data/Ec_PES_25f.csv")
## Rows: 11571 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GeneID
## dbl (10): PS, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, immPSP, matP...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# sqrt-tranformed
Ec_PES_32m.sqrt <-
readr::read_csv(file = "data/Ec_PES_32m.sqrt.csv")
## Rows: 11571 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GeneID
## dbl (10): PS, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, immPSP, matP...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Ec_PES_25f.sqrt <-
readr::read_csv(file = "data/Ec_PES_25f.sqrt.csv")
## Rows: 11571 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GeneID
## dbl (10): PS, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, immPSP, matP...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# similar to unicellular_pMat() from 2_4_unicell_pTAI.Rmd
process_pMat <- function(PES, dropzero = F, ordered_stages, top_genes = F, n_genes = 500, stage_select = "gamete"){
if(!is.data.frame(PES))
stop("PES is not a dataframe")
stage_select <- rlang::sym(stage_select)
PES_filt <- PES
PES_filt <- PES_filt %>%
myTAI::pMatrix() %>%
tibble::as_tibble(rownames = "GeneID")
# in case one wants to select the top N genes.
if(top_genes){
PES_filt <- PES_filt %>%
dplyr::slice_max({{stage_select}}, n = n_genes)
}
PES_filt <- PES_filt %>%
tidyr::pivot_longer(!GeneID, names_to = "Stage", values_to = "pTAI")
# make factor
PES_filt$Stage <- base::factor(PES_filt$Stage, ordered_stages)
return(PES_filt)
}
GO_Ec <- read_delim("data/Interproscan/Ec_ips/Ectocarpus-sp7.filt.processed.faa.tsv",
delim = "\t", col_select = c(1,14)) %>%
drop_na() %>%
filter(GO_term != "-") %>%
separate_rows(GO_term, sep = "\\|") %>%
# dplyr::mutate(GeneID = str_remove(GeneID, "\\.[^.]+$")) %>%
arrange(GeneID) %>%
distinct()
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 224135 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): GeneID, GO_term
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
IPR_Ec <- read_delim("data/Interproscan/Ec_ips/Ectocarpus-sp7.filt.processed.faa.tsv",
delim = "\t") %>%
dplyr::select(GeneID, Analysis, Signature_description, GO_term) %>%
# dplyr::mutate(GeneID = str_remove(GeneID, "\\.[^.]+$")) %>%
arrange(GeneID) %>%
distinct()
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 224135 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (10): GeneID, MD5_digest, Analysis, Signature_accession, Signature_descr...
## dbl (3): Length, Start_location, Stop_location
## lgl (1): Status
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
length(unique(GO_Ec$GO_term))
length(unique(GO_Ec$GeneID))
There are 18,289 GO terms (whereof 2070 are unique) attached to 9881 genes (including isoforms) in Ectocarpus.
# sqrt males
pTAI500 <- Ec_PES_32m.sqrt_unicell %>%
dplyr::mutate(pTAI500 = 1)
GO_fisher_input <- Ec_PES_32m.sqrt %>%
dplyr::select(GeneID) %>%
dplyr::left_join(GO_Ec, by = join_by(GeneID)) %>%
dplyr::mutate(GO_present = case_when(
!is.na(GO_term) ~ "GO_present",
TRUE ~ "GO_notpresent"
)) %>%
dplyr::left_join(pTAI500, by = join_by(GeneID)) %>%
dplyr::mutate(pTAI500 = case_when(
pTAI500 == 1 ~ "top TAI contributors",
TRUE ~ "not top TAI contributors"
)) %>%
dplyr::select(-GO_term) %>%
dplyr::distinct()
fisher.test(GO_fisher_input$pTAI500, GO_fisher_input$GO_present, alternative = "less")
##
## Fisher's Exact Test for Count Data
##
## data: GO_fisher_input$pTAI500 and GO_fisher_input$GO_present
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is less than 1
## 95 percent confidence interval:
## 0.000000 0.171571
## sample estimates:
## odds ratio
## 0.100071
# sqrt females
pTAI500 <- Ec_PES_25f.sqrt_unicell %>%
dplyr::mutate(pTAI500 = 1)
GO_fisher_input <- Ec_PES_25f.sqrt %>%
dplyr::select(GeneID) %>%
dplyr::left_join(GO_Ec, by = join_by(GeneID)) %>%
dplyr::mutate(GO_present = case_when(
!is.na(GO_term) ~ "GO_present",
TRUE ~ "GO_notpresent"
)) %>%
dplyr::left_join(pTAI500, by = join_by(GeneID)) %>%
dplyr::mutate(pTAI500 = case_when(
pTAI500 == 1 ~ "top TAI contributors",
TRUE ~ "not top TAI contributors"
)) %>%
dplyr::select(-GO_term) %>%
dplyr::distinct()
fisher.test(GO_fisher_input$pTAI500, GO_fisher_input$GO_present, alternative = "less")
##
## Fisher's Exact Test for Count Data
##
## data: GO_fisher_input$pTAI500 and GO_fisher_input$GO_present
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is less than 1
## 95 percent confidence interval:
## 0.0000000 0.1259462
## sample estimates:
## odds ratio
## 0.06177139
There is significant enrichment for genes without GO annotations.
We now plot the results.
# sqrt males
pTAI500 <- Ec_PES_32m.sqrt_unicell %>%
dplyr::mutate(pTAI500 = 1)
GO_plot_input <- Ec_PES_32m.sqrt %>%
dplyr::select(GeneID) %>%
dplyr::left_join(GO_Ec, by = join_by(GeneID)) %>%
dplyr::mutate(GO_present = case_when(
!is.na(GO_term) ~ "present",
TRUE ~ "absent"
)) %>%
dplyr::left_join(pTAI500, by = join_by(GeneID)) %>%
dplyr::mutate(pTAI500 = case_when(
pTAI500 == 1 ~ T,
TRUE ~ F
)) %>%
dplyr::select(-GO_term) %>%
dplyr::distinct()
GO_plot_input %>%
dplyr::group_by(pTAI500, GO_present) %>%
dplyr::summarise(count = n()) %>%
dplyr::group_by(pTAI500) %>%
dplyr::mutate(perc = count/sum(count)) %>%
ggplot2::ggplot(aes(x = pTAI500, y = perc*100, fill = GO_present)) +
ggplot2::geom_col(colour = "black", width = 0.4) +
ggplot2::scale_fill_manual(values = c("#E64B35FF", "white")) +
ggplot2::theme_classic() +
ggplot2::labs(title = "GO term presence",
subtitle = "Ectocarpus male sqrt TPM",
fill = "GO term",
x = "Top TAI contributors?",
y = "Percentage (%)")
## `summarise()` has grouped output by 'pTAI500'. You can override using the
## `.groups` argument.
# ggplot2::ggsave(filename = "pTAI_plots/Ec32m_uni_GO_presence.pdf",
# device = "pdf", width = 3, height = 3)
# sqrt females
pTAI500 <- Ec_PES_25f.sqrt_unicell %>%
dplyr::mutate(pTAI500 = 1)
GO_plot_input <- Ec_PES_25f.sqrt %>%
dplyr::select(GeneID) %>%
dplyr::left_join(GO_Ec, by = join_by(GeneID)) %>%
dplyr::mutate(GO_present = case_when(
!is.na(GO_term) ~ "present",
TRUE ~ "absent"
)) %>%
dplyr::left_join(pTAI500, by = join_by(GeneID)) %>%
dplyr::mutate(pTAI500 = case_when(
pTAI500 == 1 ~ T,
TRUE ~ F
)) %>%
dplyr::select(-GO_term) %>%
dplyr::distinct()
GO_plot_input %>%
dplyr::group_by(pTAI500, GO_present) %>%
dplyr::summarise(count = n()) %>%
dplyr::group_by(pTAI500) %>%
dplyr::mutate(perc = count/sum(count)) %>%
ggplot2::ggplot(aes(x = pTAI500, y = perc*100, fill = GO_present)) +
ggplot2::geom_col(colour = "black", width = 0.4) +
ggplot2::scale_fill_manual(values = c("#E64B35FF", "white")) +
ggplot2::theme_classic() +
ggplot2::labs(title = "GO term presence",
subtitle = "Ectocarpus female sqrt TPM",
fill = "GO term",
x = "Top TAI contributors?",
y = "Percentage (%)")
## `summarise()` has grouped output by 'pTAI500'. You can override using the
## `.groups` argument.
# ggplot2::ggsave(filename = "pTAI_plots/Ec25f_uni_GO_presence.pdf",
# device = "pdf", width = 3, height = 3)
unicellular: 9.57% in males with GO terms
unicellular: 6.12% in females with GO terms
# sqrt males
pTAI500 <- Ec_PES_32m.sqrt_multicell %>%
dplyr::mutate(pTAI500 = 1)
GO_fisher_input <- Ec_PES_32m.sqrt %>%
dplyr::select(GeneID) %>%
dplyr::left_join(GO_Ec, by = join_by(GeneID)) %>%
dplyr::mutate(GO_present = case_when(
!is.na(GO_term) ~ "GO_present",
TRUE ~ "GO_notpresent"
)) %>%
dplyr::left_join(pTAI500, by = join_by(GeneID)) %>%
dplyr::mutate(pTAI500 = case_when(
pTAI500 == 1 ~ "top TAI contributors",
TRUE ~ "not top TAI contributors"
)) %>%
dplyr::select(-GO_term) %>%
dplyr::distinct()
fisher.test(GO_fisher_input$pTAI500, GO_fisher_input$GO_present, alternative = "less")
##
## Fisher's Exact Test for Count Data
##
## data: GO_fisher_input$pTAI500 and GO_fisher_input$GO_present
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is less than 1
## 95 percent confidence interval:
## 0.0000000 0.2771922
## sample estimates:
## odds ratio
## 0.2018346
GO_plot_input <- Ec_PES_32m.sqrt %>%
dplyr::select(GeneID) %>%
dplyr::left_join(GO_Ec, by = join_by(GeneID)) %>%
dplyr::mutate(GO_present = case_when(
!is.na(GO_term) ~ "present",
TRUE ~ "absent"
)) %>%
dplyr::left_join(pTAI500, by = join_by(GeneID)) %>%
dplyr::mutate(pTAI500 = case_when(
pTAI500 == 1 ~ T,
TRUE ~ F
)) %>%
dplyr::select(-GO_term) %>%
dplyr::distinct()
GO_plot_input %>%
dplyr::group_by(pTAI500, GO_present) %>%
dplyr::summarise(count = n()) %>%
dplyr::group_by(pTAI500) %>%
dplyr::mutate(perc = count/sum(count)) %>%
ggplot2::ggplot(aes(x = pTAI500, y = perc*100, fill = GO_present)) +
ggplot2::geom_col(colour = "black", width = 0.4) +
ggplot2::scale_fill_manual(values = c("#E64B35FF", "white")) +
ggplot2::theme_classic() +
ggplot2::labs(title = "GO term presence",
subtitle = "Ectocarpus male sqrt TPM",
fill = "GO term",
x = "Top TAI contributors?",
y = "Percentage (%)")
## `summarise()` has grouped output by 'pTAI500'. You can override using the
## `.groups` argument.
# ggplot2::ggsave(filename = "pTAI_plots/Ec32m_multi_GO_presence.pdf",
# device = "pdf", width = 3, height = 3)
# sqrt females
pTAI500 <- Ec_PES_25f.sqrt_multicell %>%
dplyr::mutate(pTAI500 = 1)
GO_fisher_input <- Ec_PES_25f.sqrt %>%
dplyr::select(GeneID) %>%
dplyr::left_join(GO_Ec, by = join_by(GeneID)) %>%
dplyr::mutate(GO_present = case_when(
!is.na(GO_term) ~ "GO_present",
TRUE ~ "GO_notpresent"
)) %>%
dplyr::left_join(pTAI500, by = join_by(GeneID)) %>%
dplyr::mutate(pTAI500 = case_when(
pTAI500 == 1 ~ "top TAI contributors",
TRUE ~ "not top TAI contributors"
)) %>%
dplyr::select(-GO_term) %>%
dplyr::distinct()
fisher.test(GO_fisher_input$pTAI500, GO_fisher_input$GO_present, alternative = "less")
##
## Fisher's Exact Test for Count Data
##
## data: GO_fisher_input$pTAI500 and GO_fisher_input$GO_present
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is less than 1
## 95 percent confidence interval:
## 0.0000000 0.2980414
## sample estimates:
## odds ratio
## 0.2192499
GO_plot_input <- Ec_PES_25f.sqrt %>%
dplyr::select(GeneID) %>%
dplyr::left_join(GO_Ec, by = join_by(GeneID)) %>%
dplyr::mutate(GO_present = case_when(
!is.na(GO_term) ~ "present",
TRUE ~ "absent"
)) %>%
dplyr::left_join(pTAI500, by = join_by(GeneID)) %>%
dplyr::mutate(pTAI500 = case_when(
pTAI500 == 1 ~ T,
TRUE ~ F
)) %>%
dplyr::select(-GO_term) %>%
dplyr::distinct()
GO_plot_input %>%
dplyr::group_by(pTAI500, GO_present) %>%
dplyr::summarise(count = n()) %>%
dplyr::group_by(pTAI500) %>%
dplyr::mutate(perc = count/sum(count)) %>%
ggplot2::ggplot(aes(x = pTAI500, y = perc*100, fill = GO_present)) +
ggplot2::geom_col(colour = "black", width = 0.4) +
ggplot2::scale_fill_manual(values = c("#E64B35FF", "white")) +
ggplot2::theme_classic() +
ggplot2::labs(title = "GO term presence",
subtitle = "Ectocarpus female sqrt TPM",
fill = "GO term",
x = "Top TAI contributors?",
y = "Percentage (%)")
## `summarise()` has grouped output by 'pTAI500'. You can override using the
## `.groups` argument.
# ggplot2::ggsave(filename = "pTAI_plots/Ec25f_multi_GO_presence.pdf",
# device = "pdf", width = 3, height = 3)
multicellular: 17.7% in males with GO terms
multicellular: 18.9% in females with GO terms
Genes contributing to TAI in multicellular stages tend to have GO terms.
# sqrt males
pTAI500 <- Ec_PES_32m.sqrt_unicell %>%
dplyr::mutate(pTAI500 = 1)
IPR_plot_input <- Ec_PES_32m.sqrt %>%
dplyr::select(GeneID) %>%
dplyr::left_join(IPR_Ec, by = join_by(GeneID)) %>%
dplyr::mutate(IPR_present = case_when(
!is.na(Analysis) ~ "present",
TRUE ~ "absent"
)) %>%
dplyr::left_join(pTAI500, by = join_by(GeneID)) %>%
dplyr::mutate(pTAI500 = case_when(
pTAI500 == 1 ~ T,
TRUE ~ F
)) %>%
dplyr::select(-c(GO_term, Analysis, Signature_description)) %>%
dplyr::distinct()
IPR_plot_input %>%
dplyr::group_by(pTAI500, IPR_present) %>%
dplyr::summarise(count = n()) %>%
dplyr::group_by(pTAI500) %>%
dplyr::mutate(perc = count/sum(count)) %>%
ggplot2::ggplot(aes(x = pTAI500, y = perc*100, fill = IPR_present)) +
ggplot2::geom_col(colour = "black", width = 0.4) +
ggplot2::scale_fill_manual(values = c("#E64B35FF", "white")) +
ggplot2::theme_classic() +
ggplot2::labs(title = "Interproscan presence",
subtitle = "Ectocarpus male sqrt TPM",
fill = "IPS term",
x = "Top TAI contributors?",
y = "Percentage (%)")
## `summarise()` has grouped output by 'pTAI500'. You can override using the
## `.groups` argument.
# ggplot2::ggsave(filename = "pTAI_plots/Ec32m_uni_IPR_presence.pdf",
# device = "pdf", width = 3, height = 3)
# sqrt females
pTAI500 <- Ec_PES_25f.sqrt_unicell %>%
dplyr::mutate(pTAI500 = 1)
IPR_plot_input <- Ec_PES_25f.sqrt %>%
dplyr::select(GeneID) %>%
dplyr::left_join(IPR_Ec, by = join_by(GeneID)) %>%
dplyr::mutate(IPR_present = case_when(
!is.na(Analysis) ~ "present",
TRUE ~ "absent"
)) %>%
dplyr::left_join(pTAI500, by = join_by(GeneID)) %>%
dplyr::mutate(pTAI500 = case_when(
pTAI500 == 1 ~ T,
TRUE ~ F
)) %>%
dplyr::select(-c(GO_term, Analysis, Signature_description)) %>%
dplyr::distinct()
IPR_plot_input %>%
dplyr::group_by(pTAI500, IPR_present) %>%
dplyr::summarise(count = n()) %>%
dplyr::group_by(pTAI500) %>%
dplyr::mutate(perc = count/sum(count)) %>%
ggplot2::ggplot(aes(x = pTAI500, y = perc*100, fill = IPR_present)) +
ggplot2::geom_col(colour = "black", width = 0.4) +
ggplot2::scale_fill_manual(values = c("#E64B35FF", "white")) +
ggplot2::theme_classic() +
ggplot2::labs(title = "Interproscan presence",
subtitle = "Ectocarpus female sqrt TPM",
fill = "IPS term",
x = "Top TAI contributors?",
y = "Percentage (%)")
## `summarise()` has grouped output by 'pTAI500'. You can override using the
## `.groups` argument.
# ggplot2::ggsave(filename = "pTAI_plots/Ec25f_uni_IPR_presence.pdf",
# device = "pdf", width = 3, height = 3)
unicellular: 57.4% in males with IPR
unicellular: 58.2% in females with IPR
# sqrt males
pTAI500 <- Ec_PES_32m.sqrt_multicell %>%
dplyr::mutate(pTAI500 = 1)
IPR_plot_input <- Ec_PES_32m.sqrt %>%
dplyr::select(GeneID) %>%
dplyr::left_join(IPR_Ec, by = join_by(GeneID)) %>%
dplyr::mutate(IPR_present = case_when(
!is.na(Analysis) ~ "present",
TRUE ~ "absent"
)) %>%
dplyr::left_join(pTAI500, by = join_by(GeneID)) %>%
dplyr::mutate(pTAI500 = case_when(
pTAI500 == 1 ~ T,
TRUE ~ F
)) %>%
dplyr::select(-c(GO_term, Analysis, Signature_description)) %>%
dplyr::distinct()
IPR_plot_input %>%
dplyr::group_by(pTAI500, IPR_present) %>%
dplyr::summarise(count = n()) %>%
dplyr::group_by(pTAI500) %>%
dplyr::mutate(perc = count/sum(count)) %>%
ggplot2::ggplot(aes(x = pTAI500, y = perc*100, fill = IPR_present)) +
ggplot2::geom_col(colour = "black", width = 0.4) +
ggplot2::scale_fill_manual(values = c("#E64B35FF", "white")) +
ggplot2::theme_classic() +
ggplot2::labs(title = "Interproscan presence",
subtitle = "Ectocarpus male sqrt TPM",
fill = "IPS term",
x = "Top TAI contributors?",
y = "Percentage (%)")
## `summarise()` has grouped output by 'pTAI500'. You can override using the
## `.groups` argument.
# ggplot2::ggsave(filename = "pTAI_plots/Ec32m_multi_IPR_presence.pdf",
# device = "pdf", width = 3, height = 3)
# sqrt females
pTAI500 <- Ec_PES_25f.sqrt_multicell %>%
dplyr::mutate(pTAI500 = 1)
IPR_plot_input <- Ec_PES_25f.sqrt %>%
dplyr::select(GeneID) %>%
dplyr::left_join(IPR_Ec, by = join_by(GeneID)) %>%
dplyr::mutate(IPR_present = case_when(
!is.na(Analysis) ~ "present",
TRUE ~ "absent"
)) %>%
dplyr::left_join(pTAI500, by = join_by(GeneID)) %>%
dplyr::mutate(pTAI500 = case_when(
pTAI500 == 1 ~ T,
TRUE ~ F
)) %>%
dplyr::select(-c(GO_term, Analysis, Signature_description)) %>%
dplyr::distinct()
IPR_plot_input %>%
dplyr::group_by(pTAI500, IPR_present) %>%
dplyr::summarise(count = n()) %>%
dplyr::group_by(pTAI500) %>%
dplyr::mutate(perc = count/sum(count)) %>%
ggplot2::ggplot(aes(x = pTAI500, y = perc*100, fill = IPR_present)) +
ggplot2::geom_col(colour = "black", width = 0.4) +
ggplot2::scale_fill_manual(values = c("#E64B35FF", "white")) +
ggplot2::theme_classic() +
ggplot2::labs(title = "Interproscan presence",
subtitle = "Ectocarpus female sqrt TPM",
fill = "IPS term",
x = "Top TAI contributors?",
y = "Percentage (%)")
## `summarise()` has grouped output by 'pTAI500'. You can override using the
## `.groups` argument.
# ggplot2::ggsave(filename = "pTAI_plots/Ec25f_multi_IPR_presence.pdf",
# device = "pdf", width = 3, height = 3)
multicellular: 51.0% in males with IPR
multicellular: 54.7% in females with IPR
In contrast to the GO terms, multicellular stages tend to have less IPR domain annotation.
library(topGO)
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:lubridate':
##
## intersect, setdiff, union
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: graph
##
## Attaching package: 'graph'
## The following object is masked from 'package:stringr':
##
## boundary
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: GO.db
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: IRanges
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:lubridate':
##
## second, second<-
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:tidyr':
##
## expand
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:philentropy':
##
## distance
## The following object is masked from 'package:lubridate':
##
## %within%
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## The following object is masked from 'package:purrr':
##
## reduce
##
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
##
## select
##
## Loading required package: SparseM
##
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
##
## backsolve
##
## groupGOTerms: GOBPTerm, GOMFTerm, GOCCTerm environments built.
##
## Attaching package: 'topGO'
## The following object is masked from 'package:IRanges':
##
## members
createGOdata <- function(geneList, ontology, gene2GO_map, nodeSize){
OUT <-
methods::new(
"topGOdata",
description = paste("GOtest", ontology, sep = "_"),
ontology = ontology,
allGenes = geneList,
annot = annFUN.gene2GO,
gene2GO = gene2GO_map,
nodeSize = nodeSize)
return(OUT)
}
processedGOdata() wraps around my function
createGOdata()
processedGOdata <-
function(DEG_contrasts, GO_data, GO_gene_mapping, ontology, nodeSize = 10, padj_threshold = 0.05){
gene_background <-
DEG_contrasts %>%
tidyr::drop_na()
gene_query <-
dplyr::filter(gene_background, padj < padj_threshold)
geneList <-
factor(as.integer(gene_background$GeneID %in% gene_query$GeneID))
names(geneList) <- gene_background$GeneID
myGOdata <-
createGOdata(
geneList,
ontology = ontology,
gene2GO_map = GO_gene_mapping,
nodeSize = nodeSize
)
return(myGOdata)
}
To use processedGOdata()
myGOdata_CC <-
processedGOdata(
DEG_contrasts = dplyr::filter(Fs_DEG_contrasts, contrast == "48H vs 24H"),
GO_data = GO_Fs,
GO_gene_mapping = GO_Fs_gene_mapping,
ontology = "CC")
resultGO() plots or outputs the significant GO
terms.
resultGO <- function(GOdata, algorithm, firstSigNodes = 5, sig = F, plot = F, fisher_adjusted_threshold = 0.1){
## Calculate fisher exact test
result_fisher <-
topGO::runTest(
object = GOdata,
algorithm = algorithm,
statistic = "fisher")
OUT <-
topGO::GenTable(
object = GOdata,
fisher = result_fisher,
topNodes = length(score(result_fisher)))
## Correct for multiple testing:
OUT$fisher <- as.numeric(OUT$fisher)
OUT$fisher_adjusted <-
stats::p.adjust(p = OUT$fisher, method = c("fdr"))
if(sig == F & plot == F){
return(OUT)
}
## Subset calls with significance higher than expected:
if(plot == F){
OUT_sig <-
OUT %>%
dplyr::filter(fisher_adjusted < fisher_adjusted_threshold) %>%
dplyr::select(GO.ID, Term, Significant, Expected, fisher)
return(OUT_sig)
}
plot_OUT <-
topGO::showSigOfNodes(
GOdata,
score(result_fisher),
firstSigNodes = firstSigNodes,
useInfo ='all')
return(plot_OUT)
}
# also possible
# resGO_sig <- resultGO(myGOdata, "weight01", firstSigNodes = 10, sig = T)
The script above were motivated three blogposts:
For the GO analysis I used Fisher’s exact test. This is because it is more flexible. It is flexible because it requires a simpler input and assumes just two classes of genes - DEG (or nonDEG) and background. Node size 5 is recommended. Here are the top level GO categories:
CC = Cellular Compartment MF = Molecular Function BP = Biological Process
GO_Ec_gene_mapping <- base::split(GO_Ec$GO_term, GO_Ec$GeneID)
gene_background_unique <- unique(Ec_PES_25f$GeneID)
geneList <-
factor(as.integer(gene_background_unique %in% Ec_PES_25f.sqrt_unicell$GeneID))
names(geneList) <- gene_background_unique
myGOdata_CC <-
createGOdata(
geneList,
ontology = "CC",
gene2GO_map = GO_Ec_gene_mapping,
nodeSize = 10
)
##
## Building most specific GOs .....
## ( 251 GO terms found. )
##
## Build GO DAG topology ..........
## ( 454 GO terms and 807 relations. )
##
## Annotating nodes ...............
## ( 1439 genes annotated to the GO terms. )
myGOdata_BP <-
createGOdata(
geneList,
ontology = "BP",
gene2GO_map = GO_Ec_gene_mapping,
nodeSize = 10
)
##
## Building most specific GOs .....
## ( 750 GO terms found. )
##
## Build GO DAG topology ..........
## ( 1967 GO terms and 3984 relations. )
##
## Annotating nodes ...............
## ( 3205 genes annotated to the GO terms. )
myGOdata_MF <-
createGOdata(
geneList,
ontology = "MF",
gene2GO_map = GO_Ec_gene_mapping,
nodeSize = 10
)
##
## Building most specific GOs .....
## ( 929 GO terms found. )
##
## Build GO DAG topology ..........
## ( 1319 GO terms and 1695 relations. )
##
## Annotating nodes ...............
## ( 5095 genes annotated to the GO terms. )
resGO_CC <-
resultGO(myGOdata_CC, algorithm = "parentchild", sig = T)
##
## -- Parent-Child Algorithm --
##
## the algorithm is scoring 5 nontrivial nodes
## parameters:
## test statistic: fisher : joinFun = union
##
## Level 4: 1 nodes to be scored.
##
## Level 3: 1 nodes to be scored.
##
## Level 2: 2 nodes to be scored.
resGO_BP <-
resultGO(myGOdata_BP, algorithm = "parentchild", sig = T)
##
## -- Parent-Child Algorithm --
##
## the algorithm is scoring 9 nontrivial nodes
## parameters:
## test statistic: fisher : joinFun = union
##
## Level 7: 1 nodes to be scored.
##
## Level 6: 1 nodes to be scored.
##
## Level 5: 1 nodes to be scored.
##
## Level 4: 1 nodes to be scored.
##
## Level 3: 3 nodes to be scored.
##
## Level 2: 1 nodes to be scored.
resGO_MF <-
resultGO(myGOdata_MF, algorithm = "parentchild", sig = T)
##
## -- Parent-Child Algorithm --
##
## the algorithm is scoring 22 nontrivial nodes
## parameters:
## test statistic: fisher : joinFun = union
##
## Level 7: 1 nodes to be scored.
##
## Level 6: 3 nodes to be scored.
##
## Level 5: 3 nodes to be scored.
##
## Level 4: 5 nodes to be scored.
##
## Level 3: 7 nodes to be scored.
##
## Level 2: 2 nodes to be scored.
resGO_CC; resGO_BP; resGO_MF
gene_background_unique <- unique(Ec_PES_32m$GeneID)
geneList <-
factor(as.integer(gene_background_unique %in% Ec_PES_32m.sqrt_unicell$GeneID))
names(geneList) <- gene_background_unique
myGOdata_CC <-
createGOdata(
geneList,
ontology = "CC",
gene2GO_map = GO_Ec_gene_mapping,
nodeSize = 10
)
##
## Building most specific GOs .....
## ( 251 GO terms found. )
##
## Build GO DAG topology ..........
## ( 454 GO terms and 807 relations. )
##
## Annotating nodes ...............
## ( 1439 genes annotated to the GO terms. )
myGOdata_BP <-
createGOdata(
geneList,
ontology = "BP",
gene2GO_map = GO_Ec_gene_mapping,
nodeSize = 10
)
##
## Building most specific GOs .....
## ( 750 GO terms found. )
##
## Build GO DAG topology ..........
## ( 1967 GO terms and 3984 relations. )
##
## Annotating nodes ...............
## ( 3205 genes annotated to the GO terms. )
myGOdata_MF <-
createGOdata(
geneList,
ontology = "MF",
gene2GO_map = GO_Ec_gene_mapping,
nodeSize = 10
)
##
## Building most specific GOs .....
## ( 929 GO terms found. )
##
## Build GO DAG topology ..........
## ( 1319 GO terms and 1695 relations. )
##
## Annotating nodes ...............
## ( 5095 genes annotated to the GO terms. )
resGO_CC <-
resultGO(myGOdata_CC, algorithm = "parentchild", sig = T)
##
## -- Parent-Child Algorithm --
##
## the algorithm is scoring 6 nontrivial nodes
## parameters:
## test statistic: fisher : joinFun = union
##
## Level 4: 1 nodes to be scored.
##
## Level 3: 2 nodes to be scored.
##
## Level 2: 2 nodes to be scored.
resGO_BP <-
resultGO(myGOdata_BP, algorithm = "parentchild", sig = T)
##
## -- Parent-Child Algorithm --
##
## the algorithm is scoring 21 nontrivial nodes
## parameters:
## test statistic: fisher : joinFun = union
##
## Level 7: 1 nodes to be scored.
##
## Level 6: 3 nodes to be scored.
##
## Level 5: 4 nodes to be scored.
##
## Level 4: 5 nodes to be scored.
##
## Level 3: 5 nodes to be scored.
##
## Level 2: 2 nodes to be scored.
resGO_MF <-
resultGO(myGOdata_MF, algorithm = "parentchild", sig = T)
##
## -- Parent-Child Algorithm --
##
## the algorithm is scoring 22 nontrivial nodes
## parameters:
## test statistic: fisher : joinFun = union
##
## Level 7: 1 nodes to be scored.
##
## Level 6: 3 nodes to be scored.
##
## Level 5: 3 nodes to be scored.
##
## Level 4: 5 nodes to be scored.
##
## Level 3: 7 nodes to be scored.
##
## Level 2: 2 nodes to be scored.
resGO_CC; resGO_BP; resGO_MF
gene_background_unique <- unique(Ec_PES_25f$GeneID)
geneList <-
factor(as.integer(gene_background_unique %in% Ec_PES_25f.sqrt_multicell$GeneID))
names(geneList) <- gene_background_unique
myGOdata_CC <-
createGOdata(
geneList,
ontology = "CC",
gene2GO_map = GO_Ec_gene_mapping,
nodeSize = 10
)
##
## Building most specific GOs .....
## ( 251 GO terms found. )
##
## Build GO DAG topology ..........
## ( 454 GO terms and 807 relations. )
##
## Annotating nodes ...............
## ( 1439 genes annotated to the GO terms. )
myGOdata_BP <-
createGOdata(
geneList,
ontology = "BP",
gene2GO_map = GO_Ec_gene_mapping,
nodeSize = 10
)
##
## Building most specific GOs .....
## ( 750 GO terms found. )
##
## Build GO DAG topology ..........
## ( 1967 GO terms and 3984 relations. )
##
## Annotating nodes ...............
## ( 3205 genes annotated to the GO terms. )
myGOdata_MF <-
createGOdata(
geneList,
ontology = "MF",
gene2GO_map = GO_Ec_gene_mapping,
nodeSize = 10
)
##
## Building most specific GOs .....
## ( 929 GO terms found. )
##
## Build GO DAG topology ..........
## ( 1319 GO terms and 1695 relations. )
##
## Annotating nodes ...............
## ( 5095 genes annotated to the GO terms. )
resGO_CC <-
resultGO(myGOdata_CC, algorithm = "parentchild", sig = T)
##
## -- Parent-Child Algorithm --
##
## the algorithm is scoring 35 nontrivial nodes
## parameters:
## test statistic: fisher : joinFun = union
##
## Level 10: 1 nodes to be scored.
##
## Level 9: 1 nodes to be scored.
##
## Level 8: 2 nodes to be scored.
##
## Level 7: 3 nodes to be scored.
##
## Level 6: 5 nodes to be scored.
##
## Level 5: 7 nodes to be scored.
##
## Level 4: 7 nodes to be scored.
##
## Level 3: 6 nodes to be scored.
##
## Level 2: 2 nodes to be scored.
resGO_BP <-
resultGO(myGOdata_BP, algorithm = "parentchild", sig = T)
##
## -- Parent-Child Algorithm --
##
## the algorithm is scoring 112 nontrivial nodes
## parameters:
## test statistic: fisher : joinFun = union
##
## Level 11: 2 nodes to be scored.
##
## Level 10: 6 nodes to be scored.
##
## Level 9: 8 nodes to be scored.
##
## Level 8: 6 nodes to be scored.
##
## Level 7: 11 nodes to be scored.
##
## Level 6: 21 nodes to be scored.
##
## Level 5: 26 nodes to be scored.
##
## Level 4: 17 nodes to be scored.
##
## Level 3: 11 nodes to be scored.
##
## Level 2: 3 nodes to be scored.
resGO_MF <-
resultGO(myGOdata_MF, algorithm = "parentchild", sig = T)
##
## -- Parent-Child Algorithm --
##
## the algorithm is scoring 68 nontrivial nodes
## parameters:
## test statistic: fisher : joinFun = union
##
## Level 9: 2 nodes to be scored.
##
## Level 8: 6 nodes to be scored.
##
## Level 7: 10 nodes to be scored.
##
## Level 6: 11 nodes to be scored.
##
## Level 5: 12 nodes to be scored.
##
## Level 4: 11 nodes to be scored.
##
## Level 3: 9 nodes to be scored.
##
## Level 2: 6 nodes to be scored.
resGO_CC; resGO_BP; resGO_MF
gene_background_unique <- unique(Ec_PES_32m$GeneID)
geneList <-
factor(as.integer(gene_background_unique %in% Ec_PES_32m.sqrt_multicell$GeneID))
names(geneList) <- gene_background_unique
myGOdata_CC <-
createGOdata(
geneList,
ontology = "CC",
gene2GO_map = GO_Ec_gene_mapping,
nodeSize = 10
)
##
## Building most specific GOs .....
## ( 251 GO terms found. )
##
## Build GO DAG topology ..........
## ( 454 GO terms and 807 relations. )
##
## Annotating nodes ...............
## ( 1439 genes annotated to the GO terms. )
myGOdata_BP <-
createGOdata(
geneList,
ontology = "BP",
gene2GO_map = GO_Ec_gene_mapping,
nodeSize = 10
)
##
## Building most specific GOs .....
## ( 750 GO terms found. )
##
## Build GO DAG topology ..........
## ( 1967 GO terms and 3984 relations. )
##
## Annotating nodes ...............
## ( 3205 genes annotated to the GO terms. )
myGOdata_MF <-
createGOdata(
geneList,
ontology = "MF",
gene2GO_map = GO_Ec_gene_mapping,
nodeSize = 10
)
##
## Building most specific GOs .....
## ( 929 GO terms found. )
##
## Build GO DAG topology ..........
## ( 1319 GO terms and 1695 relations. )
##
## Annotating nodes ...............
## ( 5095 genes annotated to the GO terms. )
resGO_CC <-
resultGO(myGOdata_CC, algorithm = "parentchild", sig = T)
##
## -- Parent-Child Algorithm --
##
## the algorithm is scoring 35 nontrivial nodes
## parameters:
## test statistic: fisher : joinFun = union
##
## Level 10: 1 nodes to be scored.
##
## Level 9: 1 nodes to be scored.
##
## Level 8: 2 nodes to be scored.
##
## Level 7: 3 nodes to be scored.
##
## Level 6: 5 nodes to be scored.
##
## Level 5: 7 nodes to be scored.
##
## Level 4: 7 nodes to be scored.
##
## Level 3: 6 nodes to be scored.
##
## Level 2: 2 nodes to be scored.
resGO_BP <-
resultGO(myGOdata_BP, algorithm = "parentchild", sig = T)
##
## -- Parent-Child Algorithm --
##
## the algorithm is scoring 112 nontrivial nodes
## parameters:
## test statistic: fisher : joinFun = union
##
## Level 11: 2 nodes to be scored.
##
## Level 10: 6 nodes to be scored.
##
## Level 9: 8 nodes to be scored.
##
## Level 8: 6 nodes to be scored.
##
## Level 7: 11 nodes to be scored.
##
## Level 6: 21 nodes to be scored.
##
## Level 5: 26 nodes to be scored.
##
## Level 4: 17 nodes to be scored.
##
## Level 3: 11 nodes to be scored.
##
## Level 2: 3 nodes to be scored.
resGO_MF <-
resultGO(myGOdata_MF, algorithm = "parentchild", sig = T)
##
## -- Parent-Child Algorithm --
##
## the algorithm is scoring 68 nontrivial nodes
## parameters:
## test statistic: fisher : joinFun = union
##
## Level 9: 2 nodes to be scored.
##
## Level 8: 6 nodes to be scored.
##
## Level 7: 10 nodes to be scored.
##
## Level 6: 11 nodes to be scored.
##
## Level 5: 12 nodes to be scored.
##
## Level 4: 11 nodes to be scored.
##
## Level 3: 9 nodes to be scored.
##
## Level 2: 6 nodes to be scored.
resGO_CC; resGO_BP; resGO_MF
As expected from the “de-enrichment” of GO terms in unicellular stages, we can only obtain GO terms significantly enriched from the top contributors to the TAI in the multicellular stages.
Get session info.
devtools::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R version 4.2.2 (2022-10-31)
## os macOS Big Sur ... 10.16
## system x86_64, darwin17.0
## ui X11
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz Europe/Berlin
## date 2024-03-21
## pandoc 2.17.1.1 @ /Applications/RStudio.app/Contents/MacOS/quarto/bin/ (via rmarkdown)
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## package * version date (UTC) lib source
## abind 1.4-5 2016-07-21 [1] CRAN (R 4.2.0)
## annotate 1.74.0 2022-04-26 [1] Bioconductor
## AnnotationDbi * 1.58.0 2022-04-26 [1] Bioconductor
## backports 1.4.1 2021-12-13 [1] CRAN (R 4.2.0)
## Biobase * 2.58.0 2022-11-01 [1] Bioconductor
## BiocGenerics * 0.44.0 2022-11-01 [1] Bioconductor
## BiocParallel 1.30.4 2022-10-13 [1] Bioconductor
## Biostrings 2.64.1 2022-08-18 [1] Bioconductor
## bit 4.0.5 2022-11-15 [1] CRAN (R 4.2.0)
## bit64 4.0.5 2020-08-30 [1] CRAN (R 4.2.0)
## bitops 1.0-7 2021-04-24 [1] CRAN (R 4.2.0)
## blob 1.2.4 2023-03-17 [1] CRAN (R 4.2.0)
## broom 1.0.5 2023-06-09 [1] CRAN (R 4.2.0)
## bslib 0.5.0 2023-06-09 [1] CRAN (R 4.2.0)
## cachem 1.0.8 2023-05-01 [1] CRAN (R 4.2.0)
## callr 3.7.3 2022-11-02 [1] CRAN (R 4.2.0)
## car 3.1-2 2023-03-30 [1] CRAN (R 4.2.0)
## carData 3.0-5 2022-01-06 [1] CRAN (R 4.2.0)
## cli 3.6.1 2023-03-23 [1] CRAN (R 4.2.0)
## codetools 0.2-19 2023-02-01 [1] CRAN (R 4.2.0)
## colorspace 2.1-0 2023-01-23 [1] CRAN (R 4.2.0)
## cowplot 1.1.1 2020-12-30 [1] CRAN (R 4.2.0)
## crayon 1.5.2 2022-09-29 [1] CRAN (R 4.2.0)
## data.table 1.14.8 2023-02-17 [1] CRAN (R 4.2.0)
## DBI 1.1.3 2022-06-18 [1] CRAN (R 4.2.0)
## DelayedArray 0.24.0 2022-11-01 [1] Bioconductor
## DESeq2 1.36.0 2022-04-26 [1] Bioconductor
## devtools 2.4.5 2022-10-11 [1] CRAN (R 4.2.0)
## digest 0.6.33 2023-07-07 [1] CRAN (R 4.2.0)
## dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.2.0)
## dtplyr 1.3.1 2023-03-22 [1] CRAN (R 4.2.0)
## ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.2.0)
## evaluate 0.21 2023-05-05 [1] CRAN (R 4.2.0)
## fansi 1.0.4 2023-01-22 [1] CRAN (R 4.2.0)
## farver 2.1.1 2022-07-06 [1] CRAN (R 4.2.0)
## fastmap 1.1.1 2023-02-24 [1] CRAN (R 4.2.0)
## fitdistrplus 1.1-11 2023-04-25 [1] CRAN (R 4.2.0)
## forcats * 1.0.0 2023-01-29 [1] CRAN (R 4.2.0)
## foreach 1.5.2 2022-02-02 [1] CRAN (R 4.2.0)
## fs 1.6.3 2023-07-20 [1] CRAN (R 4.2.0)
## genefilter 1.78.0 2022-04-26 [1] Bioconductor
## geneplotter 1.74.0 2022-04-26 [1] Bioconductor
## generics 0.1.3 2022-07-05 [1] CRAN (R 4.2.0)
## GenomeInfoDb 1.34.9 2023-02-02 [1] Bioconductor
## GenomeInfoDbData 1.2.9 2023-06-13 [1] Bioconductor
## GenomicRanges 1.50.2 2022-12-16 [1] Bioconductor
## ggfortify * 0.4.16 2023-03-20 [1] CRAN (R 4.2.0)
## ggplot2 * 3.4.3 2023-08-14 [1] CRAN (R 4.2.0)
## ggpubr 0.6.0 2023-02-10 [1] CRAN (R 4.2.0)
## ggsci 3.0.0 2023-03-08 [1] CRAN (R 4.2.0)
## ggsignif 0.6.4 2022-10-13 [1] CRAN (R 4.2.0)
## glue 1.6.2 2022-02-24 [1] CRAN (R 4.2.0)
## GO.db * 3.15.0 2022-08-31 [1] Bioconductor
## graph * 1.74.0 2022-04-26 [1] Bioconductor
## gridExtra 2.3 2017-09-09 [1] CRAN (R 4.2.0)
## gtable 0.3.3 2023-03-21 [1] CRAN (R 4.2.0)
## highr 0.10 2022-12-22 [1] CRAN (R 4.2.0)
## hms 1.1.3 2023-03-21 [1] CRAN (R 4.2.0)
## htmltools 0.5.7 2023-11-03 [1] CRAN (R 4.2.0)
## htmlwidgets 1.6.4 2023-12-06 [1] CRAN (R 4.2.0)
## httpuv 1.6.11 2023-05-11 [1] CRAN (R 4.2.0)
## httr 1.4.6 2023-05-08 [1] CRAN (R 4.2.0)
## insight 0.19.2 2023-05-23 [1] CRAN (R 4.2.0)
## IRanges * 2.32.0 2022-11-01 [1] Bioconductor
## iterators 1.0.14 2022-02-05 [1] CRAN (R 4.2.0)
## jquerylib 0.1.4 2021-04-26 [1] CRAN (R 4.2.0)
## jsonlite 1.8.7 2023-06-29 [1] CRAN (R 4.2.0)
## KEGGREST 1.36.3 2022-07-14 [1] Bioconductor
## knitr 1.43 2023-05-25 [1] CRAN (R 4.2.0)
## labeling 0.4.2 2020-10-20 [1] CRAN (R 4.2.0)
## later 1.3.1 2023-05-02 [1] CRAN (R 4.2.0)
## lattice 0.21-8 2023-04-05 [1] CRAN (R 4.2.0)
## lifecycle 1.0.3 2022-10-07 [1] CRAN (R 4.2.0)
## locfit 1.5-9.8 2023-06-11 [1] CRAN (R 4.2.2)
## lubridate * 1.9.2 2023-02-10 [1] CRAN (R 4.2.0)
## magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.2.0)
## MASS 7.3-60 2023-05-04 [1] CRAN (R 4.2.0)
## Matrix 1.6-0 2023-07-08 [1] CRAN (R 4.2.2)
## MatrixGenerics 1.10.0 2022-11-01 [1] Bioconductor
## matrixStats 1.0.0 2023-06-02 [1] CRAN (R 4.2.0)
## memoise 2.0.1 2021-11-26 [1] CRAN (R 4.2.0)
## mgcv 1.8-42 2023-03-02 [1] CRAN (R 4.2.0)
## mime 0.12 2021-09-28 [1] CRAN (R 4.2.0)
## miniUI 0.1.1.1 2018-05-18 [1] CRAN (R 4.2.0)
## munsell 0.5.0 2018-06-12 [1] CRAN (R 4.2.0)
## myTAI * 1.0.1.9000 2023-08-18 [1] Github (drostlab/myTAI@59a95dd)
## nlme 3.1-162 2023-01-31 [1] CRAN (R 4.2.0)
## philentropy * 0.7.0 2022-11-05 [1] CRAN (R 4.2.0)
## pillar 1.9.0 2023-03-22 [1] CRAN (R 4.2.0)
## pkgbuild 1.4.0 2022-11-27 [1] CRAN (R 4.2.0)
## pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.2.0)
## pkgload 1.3.2.1 2023-07-08 [1] CRAN (R 4.2.0)
## plyr 1.8.8 2022-11-11 [1] CRAN (R 4.2.0)
## png 0.1-8 2022-11-29 [1] CRAN (R 4.2.0)
## prettyunits 1.1.1 2020-01-24 [1] CRAN (R 4.2.0)
## processx 3.8.2 2023-06-30 [1] CRAN (R 4.2.0)
## profvis 0.3.8 2023-05-02 [1] CRAN (R 4.2.0)
## promises 1.2.0.1 2021-02-11 [1] CRAN (R 4.2.0)
## ps 1.7.5 2023-04-18 [1] CRAN (R 4.2.0)
## purrr * 1.0.2 2023-08-10 [1] CRAN (R 4.2.0)
## R6 2.5.1 2021-08-19 [1] CRAN (R 4.2.0)
## ragg 1.2.5 2023-01-12 [1] CRAN (R 4.2.0)
## RColorBrewer 1.1-3 2022-04-03 [1] CRAN (R 4.2.0)
## Rcpp 1.0.11 2023-07-06 [1] CRAN (R 4.2.0)
## RCurl 1.98-1.12 2023-03-27 [1] CRAN (R 4.2.0)
## readr * 2.1.4 2023-02-10 [1] CRAN (R 4.2.0)
## remotes 2.4.2 2021-11-30 [1] CRAN (R 4.2.0)
## reshape2 1.4.4 2020-04-09 [1] CRAN (R 4.2.0)
## rlang 1.1.1 2023-04-28 [1] CRAN (R 4.2.0)
## rmarkdown 2.22 2023-06-01 [1] CRAN (R 4.2.0)
## RSQLite 2.3.1 2023-04-03 [1] CRAN (R 4.2.0)
## rstatix 0.7.2 2023-02-01 [1] CRAN (R 4.2.0)
## rstudioapi 0.14 2022-08-22 [1] CRAN (R 4.2.0)
## S4Vectors * 0.36.2 2023-02-26 [1] Bioconductor
## sass 0.4.6 2023-05-03 [1] CRAN (R 4.2.0)
## scales * 1.2.1 2022-08-20 [1] CRAN (R 4.2.0)
## see * 0.8.0 2023-06-05 [1] CRAN (R 4.2.0)
## sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.2.0)
## shiny 1.7.4 2022-12-15 [1] CRAN (R 4.2.0)
## SparseM * 1.81 2021-02-18 [1] CRAN (R 4.2.0)
## stringi 1.7.12 2023-01-11 [1] CRAN (R 4.2.0)
## stringr * 1.5.0 2022-12-02 [1] CRAN (R 4.2.0)
## SummarizedExperiment 1.28.0 2022-11-01 [1] Bioconductor
## survival 3.5-5 2023-03-12 [1] CRAN (R 4.2.2)
## systemfonts 1.0.4 2022-02-11 [1] CRAN (R 4.2.0)
## textshaping 0.3.6 2021-10-13 [1] CRAN (R 4.2.0)
## tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.2.0)
## tidyr * 1.3.0 2023-01-24 [1] CRAN (R 4.2.0)
## tidyselect 1.2.0 2022-10-10 [1] CRAN (R 4.2.0)
## tidyverse * 2.0.0 2023-02-22 [1] CRAN (R 4.2.0)
## timechange 0.2.0 2023-01-11 [1] CRAN (R 4.2.0)
## topGO * 2.48.0 2022-04-26 [1] Bioconductor
## tximport * 1.24.0 2022-04-26 [1] Bioconductor
## tzdb 0.4.0 2023-05-12 [1] CRAN (R 4.2.0)
## urlchecker 1.0.1 2021-11-30 [1] CRAN (R 4.2.0)
## usethis 2.2.0 2023-06-06 [1] CRAN (R 4.2.0)
## utf8 1.2.3 2023-01-31 [1] CRAN (R 4.2.0)
## vctrs 0.6.3 2023-06-14 [1] CRAN (R 4.2.0)
## vroom 1.6.3 2023-04-28 [1] CRAN (R 4.2.0)
## withr 2.5.0 2022-03-03 [1] CRAN (R 4.2.0)
## xfun 0.39 2023-04-20 [1] CRAN (R 4.2.0)
## XML 3.99-0.14 2023-03-19 [1] CRAN (R 4.2.0)
## xtable 1.8-4 2019-04-21 [1] CRAN (R 4.2.0)
## XVector 0.38.0 2022-11-01 [1] Bioconductor
## yaml 2.3.7 2023-01-23 [1] CRAN (R 4.2.0)
## zlibbioc 1.44.0 2022-11-01 [1] Bioconductor
##
## [1] /Library/Frameworks/R.framework/Versions/4.2/Resources/library
##
## ──────────────────────────────────────────────────────────────────────────────